#10 - Creating auxiliary client class to generate configuration class (#78)

* #10 - Creating auxiliary client class to generate configuration class based on cachet's component list.

* Updating the python version in codacy to reduce false positives

* Moving some of the cachet operations to the client class to clean up the configuration class and making better constants

* Refactoring status to have proper classes and adding more tests. Refactoring the requests tests to use requests-mock.

* Removing unused imports from test_scheduler

* Adding more tests and the ability to run the client from command line

* Updating README and client arg parsing

* Fixing broken unit tests
This commit is contained in:
mtakaki
2020-01-18 13:55:07 -08:00
committed by GitHub
parent bcafbd64f7
commit 5be0217c00
13 changed files with 530 additions and 269 deletions

View File

@@ -9,8 +9,10 @@ import time
import requests
from yaml import dump
import cachet_url_monitor.latency_unit as latency_unit
import cachet_url_monitor.status as st
from cachet_url_monitor.client import CachetClient, normalize_url
from cachet_url_monitor.exceptions import MetricNonexistentError
from cachet_url_monitor.status import ComponentStatus
# This is the mandatory fields that must be in the configuration file in this
# same exact structure.
@@ -27,58 +29,17 @@ class ConfigurationValidationError(Exception):
return repr(self.value)
class ComponentNonexistentError(Exception):
"""Exception raised when the component does not exist."""
def __init__(self, component_id):
self.component_id = component_id
def __str__(self):
return repr(f'Component with id [{self.component_id}] does not exist.')
class MetricNonexistentError(Exception):
"""Exception raised when the component does not exist."""
def __init__(self, metric_id):
self.metric_id = metric_id
def __str__(self):
return repr(f'Metric with id [{self.metric_id}] does not exist.')
def get_current_status(endpoint_url, component_id, headers):
"""Retrieves the current status of the component that is being monitored. It will fail if the component does
not exist or doesn't respond with the expected data.
:return component status.
"""
get_status_request = requests.get(f'{endpoint_url}/components/{component_id}', headers=headers)
if get_status_request.ok:
# The component exists.
return int(get_status_request.json()['data']['status'])
else:
raise ComponentNonexistentError(component_id)
def normalize_url(url):
"""If passed url doesn't include schema return it with default one - http."""
if not url.lower().startswith('http'):
return f'http://{url}'
return url
class Configuration(object):
"""Represents a configuration file, but it also includes the functionality
of assessing the API and pushing the results to cachet.
"""
def __init__(self, config_file, endpoint_index):
self.endpoint_index = endpoint_index
self.data = config_file
def __init__(self, config, endpoint_index: int):
self.endpoint_index: int = endpoint_index
self.data = config
self.endpoint = self.data['endpoints'][endpoint_index]
self.current_fails = 0
self.trigger_update = True
self.current_fails: int = 0
self.trigger_update: bool = True
if 'name' not in self.endpoint:
# We have to make this mandatory, otherwise the logs are confusing when there are multiple URLs.
@@ -93,11 +54,11 @@ class Configuration(object):
self.validate()
# We store the main information from the configuration file, so we don't keep reading from the data dictionary.
self.headers = {'X-Cachet-Token': os.environ.get('CACHET_TOKEN') or self.data['cachet']['token']}
self.token = os.environ.get('CACHET_TOKEN') or self.data['cachet']['token']
self.headers = {'X-Cachet-Token': self.token}
self.endpoint_method = self.endpoint['method']
self.endpoint_url = self.endpoint['url']
self.endpoint_url = normalize_url(self.endpoint_url)
self.endpoint_url = normalize_url(self.endpoint['url'])
self.endpoint_timeout = self.endpoint.get('timeout') or 1
self.endpoint_header = self.endpoint.get('header') or None
self.allowed_fails = self.endpoint.get('allowed_fails') or 0
@@ -106,6 +67,8 @@ class Configuration(object):
self.component_id = self.endpoint['component_id']
self.metric_id = self.endpoint.get('metric_id')
self.client = CachetClient(self.api_url, self.token)
if self.metric_id is not None:
self.default_metric_value = self.get_default_metric_value(self.metric_id)
@@ -113,8 +76,9 @@ class Configuration(object):
self.latency_unit = self.data['cachet'].get('latency_unit') or 's'
# We need the current status so we monitor the status changes. This is necessary for creating incidents.
self.status = get_current_status(self.api_url, self.component_id, self.headers)
self.status = self.client.get_component_status(self.component_id)
self.previous_status = self.status
self.logger.info(f'Component current status: {self.status}')
# Get remaining settings
self.public_incidents = int(self.endpoint['public_incidents'])
@@ -178,27 +142,27 @@ class Configuration(object):
except requests.ConnectionError:
self.message = 'The URL is unreachable: %s %s' % (self.endpoint_method, self.endpoint_url)
self.logger.warning(self.message)
self.status = st.COMPONENT_STATUS_PARTIAL_OUTAGE
self.status = st.ComponentStatus.PARTIAL_OUTAGE
return
except requests.HTTPError:
self.message = 'Unexpected HTTP response'
self.logger.exception(self.message)
self.status = st.COMPONENT_STATUS_PARTIAL_OUTAGE
self.status = st.ComponentStatus.PARTIAL_OUTAGE
return
except requests.Timeout:
except (requests.Timeout, requests.ConnectTimeout):
self.message = 'Request timed out'
self.logger.warning(self.message)
self.status = st.COMPONENT_STATUS_PERFORMANCE_ISSUES
self.status = st.ComponentStatus.PERFORMANCE_ISSUES
return
# We initially assume the API is healthy.
self.status = st.COMPONENT_STATUS_OPERATIONAL
self.status: ComponentStatus = st.ComponentStatus.OPERATIONAL
self.message = ''
for expectation in self.expectations:
status = expectation.get_status(self.request)
status: ComponentStatus = expectation.get_status(self.request)
# The greater the status is, the worse the state of the API is.
if status > self.status:
if status.value > self.status.value:
self.status = status
self.message = expectation.get_message(self.request)
self.logger.info(self.message)
@@ -220,7 +184,7 @@ class Configuration(object):
and only for non-operational ones above the configured threshold (allowed_fails).
"""
if self.status != 1:
if self.status != st.ComponentStatus.OPERATIONAL:
self.current_fails = self.current_fails + 1
self.logger.warning(f'Failure #{self.current_fails} with threshold set to {self.allowed_fails}')
if self.current_fails <= self.allowed_fails:
@@ -234,27 +198,30 @@ class Configuration(object):
status based on the previous call to evaluate().
"""
if self.previous_status == self.status:
# We don't want to keep spamming if there's no change in status.
self.logger.info(f'No changes to component status.')
self.trigger_update = False
return
self.previous_status = self.status
if not self.trigger_update:
return
self.api_component_status = get_current_status(self.api_url, self.component_id, self.headers)
api_component_status = self.client.get_component_status(self.component_id)
if self.status == self.api_component_status:
if self.status == api_component_status:
return
self.status = api_component_status
params = {'id': self.component_id, 'status': self.status}
component_request = requests.put('%s/components/%d' % (self.api_url, self.component_id), params=params,
headers=self.headers)
component_request = self.client.push_status(self.component_id, self.status)
if component_request.ok:
# Successful update
self.logger.info('Component update: status [%d]' % (self.status,))
self.logger.info(f'Component update: status [{self.status}]')
else:
# Failed to update the API status
self.logger.warning('Component update failed with status [%d]: API'
' status: [%d]' % (component_request.status_code, self.status))
self.logger.warning(f'Component update failed with HTTP status: {component_request.status_code}. API'
f' status: {self.status}')
def push_metrics(self):
"""Pushes the total amount of seconds the request took to get a response from the URL.
@@ -263,16 +230,11 @@ class Configuration(object):
"""
if 'metric_id' in self.data['cachet'] and hasattr(self, 'request'):
# We convert the elapsed time from the request, in seconds, to the configured unit.
value = self.default_metric_value if self.status != 1 else latency_unit.convert_to_unit(self.latency_unit,
self.request.elapsed.total_seconds())
params = {'id': self.metric_id, 'value': value,
'timestamp': self.current_timestamp}
metrics_request = requests.post('%s/metrics/%d/points' % (self.api_url, self.metric_id), params=params,
headers=self.headers)
metrics_request = self.client.push_metrics(self.metric_id, self.latency_unit,
self.request.elapsed.total_seconds(), self.current_timestamp)
if metrics_request.ok:
# Successful metrics upload
self.logger.info('Metric uploaded: %.6f %s' % (value, self.latency_unit))
self.logger.info('Metric uploaded: %.6f %s' % (self.request.elapsed.total_seconds(), self.latency_unit))
else:
self.logger.warning(f'Metric upload failed with status [{metrics_request.status_code}]')
@@ -282,14 +244,10 @@ class Configuration(object):
"""
if not self.trigger_update:
return
if hasattr(self, 'incident_id') and self.status == st.COMPONENT_STATUS_OPERATIONAL:
# If the incident already exists, it means it was unhealthy but now it's healthy again.
params = {'status': 4, 'visible': self.public_incidents, 'component_id': self.component_id,
'component_status': self.status,
'notify': True}
if hasattr(self, 'incident_id') and self.status == st.ComponentStatus.OPERATIONAL:
incident_request = self.client.push_incident(self.status, self.public_incidents, self.component_id,
previous_incident_id=self.incident_id)
incident_request = requests.put(f'{self.api_url}/incidents/{self.incident_id}', params=params,
headers=self.headers)
if incident_request.ok:
# Successful metrics upload
self.logger.info(
@@ -298,11 +256,9 @@ class Configuration(object):
else:
self.logger.warning(
f'Incident update failed with status [{incident_request.status_code}], message: "{self.message}"')
elif not hasattr(self, 'incident_id') and self.status != st.COMPONENT_STATUS_OPERATIONAL:
# This is the first time the incident is being created.
params = {'name': 'URL unavailable', 'message': self.message, 'status': 1, 'visible': self.public_incidents,
'component_id': self.component_id, 'component_status': self.status, 'notify': True}
incident_request = requests.post(f'{self.api_url}/incidents', params=params, headers=self.headers)
elif not hasattr(self, 'incident_id') and self.status != st.ComponentStatus.OPERATIONAL:
incident_request = self.client.push_incident(self.status, self.public_incidents, self.component_id,
message=self.message)
if incident_request.ok:
# Successful incident upload.
self.incident_id = incident_request.json()['data']['id']
@@ -338,20 +294,20 @@ class Expectation(object):
self.incident_status = self.parse_incident_status(configuration)
@abc.abstractmethod
def get_status(self, response):
def get_status(self, response) -> ComponentStatus:
"""Returns the status of the API, following cachet's component status
documentation: https://docs.cachethq.io/docs/component-statuses
"""
@abc.abstractmethod
def get_message(self, response):
def get_message(self, response) -> str:
"""Gets the error message."""
@abc.abstractmethod
def get_default_incident(self):
"""Returns the default status when this incident happens."""
def parse_incident_status(self, configuration):
def parse_incident_status(self, configuration) -> ComponentStatus:
return st.INCIDENT_MAP.get(configuration.get('incident', None), self.get_default_incident())
@@ -374,14 +330,14 @@ class HttpStatus(Expectation):
# We shouldn't look into more than one value, as this is a range value.
return int(statuses[0]), int(statuses[1])
def get_status(self, response):
def get_status(self, response) -> ComponentStatus:
if self.status_range[0] <= response.status_code < self.status_range[1]:
return st.COMPONENT_STATUS_OPERATIONAL
return st.ComponentStatus.OPERATIONAL
else:
return self.incident_status
def get_default_incident(self):
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
return st.ComponentStatus.PARTIAL_OUTAGE
def get_message(self, response):
return f'Unexpected HTTP status ({response.status_code})'
@@ -395,14 +351,14 @@ class Latency(Expectation):
self.threshold = configuration['threshold']
super(Latency, self).__init__(configuration)
def get_status(self, response):
def get_status(self, response) -> ComponentStatus:
if response.elapsed.total_seconds() <= self.threshold:
return st.COMPONENT_STATUS_OPERATIONAL
return st.ComponentStatus.OPERATIONAL
else:
return self.incident_status
def get_default_incident(self):
return st.COMPONENT_STATUS_PERFORMANCE_ISSUES
return st.ComponentStatus.PERFORMANCE_ISSUES
def get_message(self, response):
return 'Latency above threshold: %.4f seconds' % (response.elapsed.total_seconds(),)
@@ -417,14 +373,14 @@ class Regex(Expectation):
self.regex = re.compile(configuration['regex'], re.UNICODE + re.DOTALL)
super(Regex, self).__init__(configuration)
def get_status(self, response):
def get_status(self, response) -> ComponentStatus:
if self.regex.match(response.text):
return st.COMPONENT_STATUS_OPERATIONAL
return st.ComponentStatus.OPERATIONAL
else:
return self.incident_status
def get_default_incident(self):
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
return st.ComponentStatus.PARTIAL_OUTAGE
def get_message(self, response):
return 'Regex did not match anything in the body'