From 5be0217c00b9d42bf84f0370bfd7610707bc56a0 Mon Sep 17 00:00:00 2001 From: mtakaki Date: Sat, 18 Jan 2020 13:55:07 -0800 Subject: [PATCH] #10 - Creating auxiliary client class to generate configuration class (#78) * #10 - Creating auxiliary client class to generate configuration class based on cachet's component list. * Updating the python version in codacy to reduce false positives * Moving some of the cachet operations to the client class to clean up the configuration class and making better constants * Refactoring status to have proper classes and adding more tests. Refactoring the requests tests to use requests-mock. * Removing unused imports from test_scheduler * Adding more tests and the ability to run the client from command line * Updating README and client arg parsing * Fixing broken unit tests --- .codacy.yml | 5 + README.md | 34 +++++- cachet_url_monitor/client.py | 143 +++++++++++++++++++++++ cachet_url_monitor/configuration.py | 154 +++++++++--------------- cachet_url_monitor/exceptions.py | 19 +++ cachet_url_monitor/scheduler.py | 44 ++++--- cachet_url_monitor/status.py | 28 +++-- dev_requirements.txt | 1 + requirements.txt | 1 + tests/test_client.py | 147 +++++++++++++++++++++++ tests/test_configuration.py | 175 ++++++++-------------------- tests/test_expectation.py | 13 ++- tests/test_scheduler.py | 35 +++++- 13 files changed, 530 insertions(+), 269 deletions(-) create mode 100644 .codacy.yml create mode 100644 cachet_url_monitor/client.py create mode 100644 cachet_url_monitor/exceptions.py create mode 100644 tests/test_client.py diff --git a/.codacy.yml b/.codacy.yml new file mode 100644 index 0000000..9287a6a --- /dev/null +++ b/.codacy.yml @@ -0,0 +1,5 @@ +--- +engines: + pylint: + enabled: true + python_version: 3 diff --git a/README.md b/README.md index da1a8d9..c0cc2f1 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ By choosing any of the aforementioned statuses, it will let you control the kind The application should be installed using **virtualenv**, through the following command: -``` +```bash $ git clone https://github.com/mtakaki/cachet-url-monitor.git $ virtualenv cachet-url-monitor $ cd cachet-url-monitor @@ -117,7 +117,7 @@ $ python3 setup.py install To start the agent: -``` +```bash $ python3 cachet_url_monitor/scheduler.py config.yml ``` @@ -127,22 +127,46 @@ You can run the agent in docker, so you won't need to worry about installing pyt You have two choices, checking this repo out and building the docker image or it can be pulled directly from [dockerhub](https://hub.docker.com/r/mtakaki/cachet-url-monitor/). You will need to create your own custom `config.yml` file and run (it will pull latest): -``` +```bash $ docker pull mtakaki/cachet-url-monitor $ docker run --rm -it -v "$PWD":/usr/src/app/config/ mtakaki/cachet-url-monitor ``` If you're going to use a file with a name other than `config.yml`, you will need to map the local file, like this: -``` +```bash $ docker run --rm -it -v "$PWD"/my_config.yml:/usr/src/app/config/config.yml:ro mtakaki/cachet-url-monitor ``` +## Generating configuration from existing CachetHQ instance (since 0.6.1) + +In order to expedite the creation of your configuration file, you can use the client to automatically scrape the CachetHQ instance and spit out a YAML file. It can be used like this: +```bash +$ python cachet_url_monitor/client.py http://localhost/api/v1 my-token test.yml +``` +Or from docker (you will end up with a `test.yml` in your `$PWD/tmp` folder): +```bash +$ docker run --rm -it -v $PWD/tmp:/home/tmp/ mtakaki/cachet-url-monitor python3.7 ./cachet_url_monitor/client.py http://localhost/api/v1 my-token /home/tmp/test.yml +``` +The arguments are: +- **URL**, the CachetHQ API URL, so that means appending `/api/v1` to your hostname. +- **token**, the token that has access to your CachetHQ instance. +- **filename**, the file where it should write the configuration. + +### Caveats +Because we can't predict what expectations will be needed, it will default to these behavior: +- Verify a [200-300[ HTTP status range. +- If status fail, make the incident major and public. +- Frequency of 30 seconds. +- `GET` request. +- Timeout of 1s. +- We'll read the `link` field from the components and use it as the URL. + ## Troubleshooting ### SSLERROR If it's throwing the following exception: -``` +```python raise SSLError(e, request=request) requests.exceptions.SSLError: HTTPSConnectionPool(host='redacted', port=443): Max retries exceeded with url: /api/v1/components/19 (Caused by SSLError(SSLError(1, u'[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:579)'),)) ``` diff --git a/cachet_url_monitor/client.py b/cachet_url_monitor/client.py new file mode 100644 index 0000000..d4a0849 --- /dev/null +++ b/cachet_url_monitor/client.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python +from typing import Dict + +import click +import requests +from yaml import dump +from cachet_url_monitor import latency_unit, status, exceptions + + +def normalize_url(url: str) -> str: + """If passed url doesn't include schema return it with default one - http.""" + if not url.lower().startswith('http'): + return f'http://{url}' + return url + + +def save_config(config_map, filename: str): + with open(filename, 'w') as file: + dump(config_map, file) + + +class CachetClient(object): + """Utility class to interact with CahetHQ server.""" + url: str + token: str + headers: Dict[str, str] + + def __init__(self, url: str, token: str): + self.url = normalize_url(url) + self.token = token + self.headers = {'X-Cachet-Token': token} + + def get_components(self): + """Retrieves all components registered in cachet-hq""" + return requests.get(f"{self.url}/components", headers=self.headers).json()['data'] + + def get_metrics(self): + """Retrieves all metrics registered in cachet-hq""" + return requests.get(f"{self.url}/metrics", headers=self.headers).json()['data'] + + def generate_config(self): + components = self.get_components() + generated_endpoints = [ + { + 'name': component['name'], + 'url': component['link'], + 'method': 'GET', + 'timeout': 1, + 'expectation': [ + { + 'type': 'HTTP_STATUS', + 'status_range': '200-300', + 'incident': 'MAJOR' + } + ], + 'allowed_fails': 0, + 'frequency': 30, + 'component_id': component['id'], + 'action': [ + 'CREATE_INCIDENT', + 'UPDATE_STATUS', + ], + 'public_incidents': True, + } for component in components if component['enabled'] + ] + generated_config = { + 'cachet': { + 'api_url': self.url, + 'token': self.token, + }, + 'endpoints': generated_endpoints + } + return generated_config + + def get_default_metric_value(self, metric_id): + """Returns default value for configured metric.""" + get_metric_request = requests.get(f"{self.url}/metrics/{metric_id}", headers=self.headers) + + if get_metric_request.ok: + return get_metric_request.json()['data']['default_value'] + else: + raise exceptions.MetricNonexistentError(metric_id) + + def get_component_status(self, component_id): + """Retrieves the current status of the given component. It will fail if the component does + not exist or doesn't respond with the expected data. + :return component status. + """ + get_status_request = requests.get(f'{self.url}/components/{component_id}', headers=self.headers) + + if get_status_request.ok: + # The component exists. + return status.ComponentStatus(int(get_status_request.json()['data']['status'])) + else: + raise exceptions.ComponentNonexistentError(component_id) + + def push_status(self, component_id, component_status): + """Pushes the status of the component to the cachet server. + """ + params = {'id': component_id, 'status': component_status} + return requests.put(f"{self.url}/components/{component_id}", params=params, headers=self.headers) + + def push_metrics(self, metric_id, latency_time_unit, elapsed_time_in_seconds, timestamp): + """Pushes the total amount of seconds the request took to get a response from the URL. + """ + value = latency_unit.convert_to_unit(latency_time_unit, elapsed_time_in_seconds) + params = {'id': metric_id, 'value': value, 'timestamp': timestamp} + return requests.post(f"{self.url}/metrics/{metric_id}/points", params=params, headers=self.headers) + + def push_incident(self, status_value: status.ComponentStatus, is_public_incident: bool, component_id: int, + previous_incident_id=None, message=None): + """If the component status has changed, we create a new incident (if this is the first time it becomes unstable) + or updates the existing incident once it becomes healthy again. + """ + if previous_incident_id and status_value == status.ComponentStatus.OPERATIONAL: + # If the incident already exists, it means it was unhealthy but now it's healthy again. + params = {'status': status.IncidentStatus.FIXED.value, 'visible': is_public_incident, + 'component_id': component_id, 'component_status': status_value.value, 'notify': True} + + return requests.put(f'{self.url}/incidents/{previous_incident_id}', params=params, headers=self.headers) + elif not previous_incident_id and status_value != status.ComponentStatus.OPERATIONAL: + # This is the first time the incident is being created. + params = {'name': 'URL unavailable', 'message': message, + 'status': status.IncidentStatus.INVESTIGATING.value, + 'visible': is_public_incident, 'component_id': component_id, 'component_status': status_value, + 'notify': True} + return requests.post(f'{self.url}/incidents', params=params, headers=self.headers) + +@click.group() +def run_client(): + pass + +@click.command() +@click.argument('url') +@click.argument('token') +@click.argument('output') +def run_client(url, token, output): + client = CachetClient(url, token) + config = client.generate_config() + save_config(config, output) + +if __name__ == '__main__': + run_client() diff --git a/cachet_url_monitor/configuration.py b/cachet_url_monitor/configuration.py index dcd5ce2..307f3bf 100644 --- a/cachet_url_monitor/configuration.py +++ b/cachet_url_monitor/configuration.py @@ -9,8 +9,10 @@ import time import requests from yaml import dump -import cachet_url_monitor.latency_unit as latency_unit import cachet_url_monitor.status as st +from cachet_url_monitor.client import CachetClient, normalize_url +from cachet_url_monitor.exceptions import MetricNonexistentError +from cachet_url_monitor.status import ComponentStatus # This is the mandatory fields that must be in the configuration file in this # same exact structure. @@ -27,58 +29,17 @@ class ConfigurationValidationError(Exception): return repr(self.value) -class ComponentNonexistentError(Exception): - """Exception raised when the component does not exist.""" - - def __init__(self, component_id): - self.component_id = component_id - - def __str__(self): - return repr(f'Component with id [{self.component_id}] does not exist.') - - -class MetricNonexistentError(Exception): - """Exception raised when the component does not exist.""" - - def __init__(self, metric_id): - self.metric_id = metric_id - - def __str__(self): - return repr(f'Metric with id [{self.metric_id}] does not exist.') - - -def get_current_status(endpoint_url, component_id, headers): - """Retrieves the current status of the component that is being monitored. It will fail if the component does - not exist or doesn't respond with the expected data. - :return component status. - """ - get_status_request = requests.get(f'{endpoint_url}/components/{component_id}', headers=headers) - - if get_status_request.ok: - # The component exists. - return int(get_status_request.json()['data']['status']) - else: - raise ComponentNonexistentError(component_id) - - -def normalize_url(url): - """If passed url doesn't include schema return it with default one - http.""" - if not url.lower().startswith('http'): - return f'http://{url}' - return url - - class Configuration(object): """Represents a configuration file, but it also includes the functionality of assessing the API and pushing the results to cachet. """ - def __init__(self, config_file, endpoint_index): - self.endpoint_index = endpoint_index - self.data = config_file + def __init__(self, config, endpoint_index: int): + self.endpoint_index: int = endpoint_index + self.data = config self.endpoint = self.data['endpoints'][endpoint_index] - self.current_fails = 0 - self.trigger_update = True + self.current_fails: int = 0 + self.trigger_update: bool = True if 'name' not in self.endpoint: # We have to make this mandatory, otherwise the logs are confusing when there are multiple URLs. @@ -93,11 +54,11 @@ class Configuration(object): self.validate() # We store the main information from the configuration file, so we don't keep reading from the data dictionary. - self.headers = {'X-Cachet-Token': os.environ.get('CACHET_TOKEN') or self.data['cachet']['token']} + self.token = os.environ.get('CACHET_TOKEN') or self.data['cachet']['token'] + self.headers = {'X-Cachet-Token': self.token} self.endpoint_method = self.endpoint['method'] - self.endpoint_url = self.endpoint['url'] - self.endpoint_url = normalize_url(self.endpoint_url) + self.endpoint_url = normalize_url(self.endpoint['url']) self.endpoint_timeout = self.endpoint.get('timeout') or 1 self.endpoint_header = self.endpoint.get('header') or None self.allowed_fails = self.endpoint.get('allowed_fails') or 0 @@ -106,6 +67,8 @@ class Configuration(object): self.component_id = self.endpoint['component_id'] self.metric_id = self.endpoint.get('metric_id') + self.client = CachetClient(self.api_url, self.token) + if self.metric_id is not None: self.default_metric_value = self.get_default_metric_value(self.metric_id) @@ -113,8 +76,9 @@ class Configuration(object): self.latency_unit = self.data['cachet'].get('latency_unit') or 's' # We need the current status so we monitor the status changes. This is necessary for creating incidents. - self.status = get_current_status(self.api_url, self.component_id, self.headers) + self.status = self.client.get_component_status(self.component_id) self.previous_status = self.status + self.logger.info(f'Component current status: {self.status}') # Get remaining settings self.public_incidents = int(self.endpoint['public_incidents']) @@ -178,27 +142,27 @@ class Configuration(object): except requests.ConnectionError: self.message = 'The URL is unreachable: %s %s' % (self.endpoint_method, self.endpoint_url) self.logger.warning(self.message) - self.status = st.COMPONENT_STATUS_PARTIAL_OUTAGE + self.status = st.ComponentStatus.PARTIAL_OUTAGE return except requests.HTTPError: self.message = 'Unexpected HTTP response' self.logger.exception(self.message) - self.status = st.COMPONENT_STATUS_PARTIAL_OUTAGE + self.status = st.ComponentStatus.PARTIAL_OUTAGE return - except requests.Timeout: + except (requests.Timeout, requests.ConnectTimeout): self.message = 'Request timed out' self.logger.warning(self.message) - self.status = st.COMPONENT_STATUS_PERFORMANCE_ISSUES + self.status = st.ComponentStatus.PERFORMANCE_ISSUES return # We initially assume the API is healthy. - self.status = st.COMPONENT_STATUS_OPERATIONAL + self.status: ComponentStatus = st.ComponentStatus.OPERATIONAL self.message = '' for expectation in self.expectations: - status = expectation.get_status(self.request) + status: ComponentStatus = expectation.get_status(self.request) # The greater the status is, the worse the state of the API is. - if status > self.status: + if status.value > self.status.value: self.status = status self.message = expectation.get_message(self.request) self.logger.info(self.message) @@ -220,7 +184,7 @@ class Configuration(object): and only for non-operational ones above the configured threshold (allowed_fails). """ - if self.status != 1: + if self.status != st.ComponentStatus.OPERATIONAL: self.current_fails = self.current_fails + 1 self.logger.warning(f'Failure #{self.current_fails} with threshold set to {self.allowed_fails}') if self.current_fails <= self.allowed_fails: @@ -234,27 +198,30 @@ class Configuration(object): status based on the previous call to evaluate(). """ if self.previous_status == self.status: + # We don't want to keep spamming if there's no change in status. + self.logger.info(f'No changes to component status.') + self.trigger_update = False return + self.previous_status = self.status if not self.trigger_update: return - self.api_component_status = get_current_status(self.api_url, self.component_id, self.headers) + api_component_status = self.client.get_component_status(self.component_id) - if self.status == self.api_component_status: + if self.status == api_component_status: return + self.status = api_component_status - params = {'id': self.component_id, 'status': self.status} - component_request = requests.put('%s/components/%d' % (self.api_url, self.component_id), params=params, - headers=self.headers) + component_request = self.client.push_status(self.component_id, self.status) if component_request.ok: # Successful update - self.logger.info('Component update: status [%d]' % (self.status,)) + self.logger.info(f'Component update: status [{self.status}]') else: # Failed to update the API status - self.logger.warning('Component update failed with status [%d]: API' - ' status: [%d]' % (component_request.status_code, self.status)) + self.logger.warning(f'Component update failed with HTTP status: {component_request.status_code}. API' + f' status: {self.status}') def push_metrics(self): """Pushes the total amount of seconds the request took to get a response from the URL. @@ -263,16 +230,11 @@ class Configuration(object): """ if 'metric_id' in self.data['cachet'] and hasattr(self, 'request'): # We convert the elapsed time from the request, in seconds, to the configured unit. - value = self.default_metric_value if self.status != 1 else latency_unit.convert_to_unit(self.latency_unit, - self.request.elapsed.total_seconds()) - params = {'id': self.metric_id, 'value': value, - 'timestamp': self.current_timestamp} - metrics_request = requests.post('%s/metrics/%d/points' % (self.api_url, self.metric_id), params=params, - headers=self.headers) - + metrics_request = self.client.push_metrics(self.metric_id, self.latency_unit, + self.request.elapsed.total_seconds(), self.current_timestamp) if metrics_request.ok: # Successful metrics upload - self.logger.info('Metric uploaded: %.6f %s' % (value, self.latency_unit)) + self.logger.info('Metric uploaded: %.6f %s' % (self.request.elapsed.total_seconds(), self.latency_unit)) else: self.logger.warning(f'Metric upload failed with status [{metrics_request.status_code}]') @@ -282,14 +244,10 @@ class Configuration(object): """ if not self.trigger_update: return - if hasattr(self, 'incident_id') and self.status == st.COMPONENT_STATUS_OPERATIONAL: - # If the incident already exists, it means it was unhealthy but now it's healthy again. - params = {'status': 4, 'visible': self.public_incidents, 'component_id': self.component_id, - 'component_status': self.status, - 'notify': True} + if hasattr(self, 'incident_id') and self.status == st.ComponentStatus.OPERATIONAL: + incident_request = self.client.push_incident(self.status, self.public_incidents, self.component_id, + previous_incident_id=self.incident_id) - incident_request = requests.put(f'{self.api_url}/incidents/{self.incident_id}', params=params, - headers=self.headers) if incident_request.ok: # Successful metrics upload self.logger.info( @@ -298,11 +256,9 @@ class Configuration(object): else: self.logger.warning( f'Incident update failed with status [{incident_request.status_code}], message: "{self.message}"') - elif not hasattr(self, 'incident_id') and self.status != st.COMPONENT_STATUS_OPERATIONAL: - # This is the first time the incident is being created. - params = {'name': 'URL unavailable', 'message': self.message, 'status': 1, 'visible': self.public_incidents, - 'component_id': self.component_id, 'component_status': self.status, 'notify': True} - incident_request = requests.post(f'{self.api_url}/incidents', params=params, headers=self.headers) + elif not hasattr(self, 'incident_id') and self.status != st.ComponentStatus.OPERATIONAL: + incident_request = self.client.push_incident(self.status, self.public_incidents, self.component_id, + message=self.message) if incident_request.ok: # Successful incident upload. self.incident_id = incident_request.json()['data']['id'] @@ -338,20 +294,20 @@ class Expectation(object): self.incident_status = self.parse_incident_status(configuration) @abc.abstractmethod - def get_status(self, response): + def get_status(self, response) -> ComponentStatus: """Returns the status of the API, following cachet's component status documentation: https://docs.cachethq.io/docs/component-statuses """ @abc.abstractmethod - def get_message(self, response): + def get_message(self, response) -> str: """Gets the error message.""" @abc.abstractmethod def get_default_incident(self): """Returns the default status when this incident happens.""" - def parse_incident_status(self, configuration): + def parse_incident_status(self, configuration) -> ComponentStatus: return st.INCIDENT_MAP.get(configuration.get('incident', None), self.get_default_incident()) @@ -374,14 +330,14 @@ class HttpStatus(Expectation): # We shouldn't look into more than one value, as this is a range value. return int(statuses[0]), int(statuses[1]) - def get_status(self, response): + def get_status(self, response) -> ComponentStatus: if self.status_range[0] <= response.status_code < self.status_range[1]: - return st.COMPONENT_STATUS_OPERATIONAL + return st.ComponentStatus.OPERATIONAL else: return self.incident_status def get_default_incident(self): - return st.COMPONENT_STATUS_PARTIAL_OUTAGE + return st.ComponentStatus.PARTIAL_OUTAGE def get_message(self, response): return f'Unexpected HTTP status ({response.status_code})' @@ -395,14 +351,14 @@ class Latency(Expectation): self.threshold = configuration['threshold'] super(Latency, self).__init__(configuration) - def get_status(self, response): + def get_status(self, response) -> ComponentStatus: if response.elapsed.total_seconds() <= self.threshold: - return st.COMPONENT_STATUS_OPERATIONAL + return st.ComponentStatus.OPERATIONAL else: return self.incident_status def get_default_incident(self): - return st.COMPONENT_STATUS_PERFORMANCE_ISSUES + return st.ComponentStatus.PERFORMANCE_ISSUES def get_message(self, response): return 'Latency above threshold: %.4f seconds' % (response.elapsed.total_seconds(),) @@ -417,14 +373,14 @@ class Regex(Expectation): self.regex = re.compile(configuration['regex'], re.UNICODE + re.DOTALL) super(Regex, self).__init__(configuration) - def get_status(self, response): + def get_status(self, response) -> ComponentStatus: if self.regex.match(response.text): - return st.COMPONENT_STATUS_OPERATIONAL + return st.ComponentStatus.OPERATIONAL else: return self.incident_status def get_default_incident(self): - return st.COMPONENT_STATUS_PARTIAL_OUTAGE + return st.ComponentStatus.PARTIAL_OUTAGE def get_message(self, response): return 'Regex did not match anything in the body' diff --git a/cachet_url_monitor/exceptions.py b/cachet_url_monitor/exceptions.py new file mode 100644 index 0000000..9ede85f --- /dev/null +++ b/cachet_url_monitor/exceptions.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python +class ComponentNonexistentError(Exception): + """Exception raised when the component does not exist.""" + + def __init__(self, component_id): + self.component_id = component_id + + def __str__(self): + return repr(f'Component with id [{self.component_id}] does not exist.') + + +class MetricNonexistentError(Exception): + """Exception raised when the component does not exist.""" + + def __init__(self, metric_id): + self.metric_id = metric_id + + def __str__(self): + return repr(f'Metric with id [{self.metric_id}] does not exist.') diff --git a/cachet_url_monitor/scheduler.py b/cachet_url_monitor/scheduler.py index 67b5afc..8171922 100644 --- a/cachet_url_monitor/scheduler.py +++ b/cachet_url_monitor/scheduler.py @@ -40,45 +40,40 @@ class Agent(object): class Decorator(object): + """Defines the actions a user can configure to be executed when there's an incident.""" + def execute(self, configuration): pass class UpdateStatusDecorator(Decorator): + """Updates the component status when an incident happens.""" + def execute(self, configuration): configuration.push_status() class CreateIncidentDecorator(Decorator): + """Creates an incident entry on cachet when an incident happens.""" + def execute(self, configuration): configuration.push_incident() class PushMetricsDecorator(Decorator): + """Updates the URL latency metric.""" + def execute(self, configuration): configuration.push_metrics() class Scheduler(object): - def __init__(self, config_file, endpoint_index): + def __init__(self, configuration, agent): self.logger = logging.getLogger('cachet_url_monitor.scheduler.Scheduler') - self.configuration = Configuration(config_file, endpoint_index) - self.agent = self.get_agent() - + self.configuration = configuration + self.agent = agent self.stop = False - def get_agent(self): - action_names = { - 'CREATE_INCIDENT': CreateIncidentDecorator, - 'UPDATE_STATUS': UpdateStatusDecorator, - 'PUSH_METRICS': PushMetricsDecorator, - } - actions = [] - for action in self.configuration.get_action(): - self.logger.info(f'Registering action {action}') - actions.append(action_names[action]()) - return Agent(self.configuration, decorators=actions) - def start(self): self.agent.start() self.logger.info('Starting monitor agent...') @@ -96,6 +91,19 @@ class NewThread(threading.Thread): self.scheduler.start() +def build_agent(configuration, logger): + action_names = { + 'CREATE_INCIDENT': CreateIncidentDecorator, + 'UPDATE_STATUS': UpdateStatusDecorator, + 'PUSH_METRICS': PushMetricsDecorator, + } + actions = [] + for action in configuration.get_action(): + logger.info(f'Registering action {action}') + actions.append(action_names[action]()) + return Agent(configuration, decorators=actions) + + def validate_config(): if 'endpoints' not in config_file.keys(): fatal_error('Endpoints is a mandatory field') @@ -132,4 +140,6 @@ if __name__ == "__main__": validate_config() for endpoint_index in range(len(config_file['endpoints'])): - NewThread(Scheduler(config_file, endpoint_index)).start() + configuration = Configuration(config_file, endpoint_index) + NewThread(Scheduler(configuration, + build_agent(configuration, logging.getLogger('cachet_url_monitor.scheduler')))).start() diff --git a/cachet_url_monitor/status.py b/cachet_url_monitor/status.py index 20d4061..f230e02 100644 --- a/cachet_url_monitor/status.py +++ b/cachet_url_monitor/status.py @@ -3,22 +3,30 @@ This file defines all the different status different values. These are all constants and are coupled to cachet's API configuration. """ +from enum import Enum -COMPONENT_STATUS_OPERATIONAL = 1 -COMPONENT_STATUS_PERFORMANCE_ISSUES = 2 -COMPONENT_STATUS_PARTIAL_OUTAGE = 3 -COMPONENT_STATUS_MAJOR_OUTAGE = 4 -COMPONENT_STATUSES = [COMPONENT_STATUS_OPERATIONAL, - COMPONENT_STATUS_PERFORMANCE_ISSUES, COMPONENT_STATUS_PARTIAL_OUTAGE, - COMPONENT_STATUS_MAJOR_OUTAGE] +class ComponentStatus(Enum): + OPERATIONAL = 1 + PERFORMANCE_ISSUES = 2 + PARTIAL_OUTAGE = 3 + MAJOR_OUTAGE = 4 + INCIDENT_PARTIAL = 'PARTIAL' INCIDENT_MAJOR = 'MAJOR' INCIDENT_PERFORMANCE = 'PERFORMANCE' INCIDENT_MAP = { - INCIDENT_PARTIAL: COMPONENT_STATUS_PARTIAL_OUTAGE, - INCIDENT_MAJOR: COMPONENT_STATUS_MAJOR_OUTAGE, - INCIDENT_PERFORMANCE: COMPONENT_STATUS_PERFORMANCE_ISSUES, + INCIDENT_PARTIAL: ComponentStatus.PARTIAL_OUTAGE, + INCIDENT_MAJOR: ComponentStatus.MAJOR_OUTAGE, + INCIDENT_PERFORMANCE: ComponentStatus.PERFORMANCE_ISSUES, } + + +class IncidentStatus(Enum): + SCHEDULED = 0 + INVESTIGATING = 1 + IDENTIFIED = 2 + WATCHING = 3 + FIXED = 4 diff --git a/dev_requirements.txt b/dev_requirements.txt index 86dfa4d..7f56348 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -5,3 +5,4 @@ pudb==2016.1 pytest==5.2.2 pytest-cov==2.8.1 coverage==4.5.2 +requests-mock==1.7.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 53629cf..0c761a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ PyYAML==5.1.2 requests==2.22.0 schedule==0.6.0 +Click==7.0 diff --git a/tests/test_client.py b/tests/test_client.py new file mode 100644 index 0000000..4611c7b --- /dev/null +++ b/tests/test_client.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python +import unittest +from typing import Dict, List + +import requests_mock + +from cachet_url_monitor.client import CachetClient +from cachet_url_monitor.exceptions import MetricNonexistentError +from cachet_url_monitor.status import ComponentStatus + +TOKEN: str = 'token_123' +CACHET_URL: str = 'http://foo.localhost' +JSON: Dict[str, List[Dict[str, int]]] = {'data': [{'id': 1}]} + + +class ClientTest(unittest.TestCase): + def setUp(self): + self.client = CachetClient('foo.localhost', TOKEN) + + def test_init(self): + self.assertEqual(self.client.headers, {'X-Cachet-Token': TOKEN}, 'Header was not set correctly') + self.assertEqual(self.client.url, CACHET_URL, 'Cachet API URL was set incorrectly') + + @requests_mock.mock() + def test_get_components(self, m): + m.get(f'{CACHET_URL}/components', json=JSON, headers={'X-Cachet-Token': TOKEN}) + components = self.client.get_components() + + self.assertEqual(components, [{'id': 1}], + 'Getting components list is incorrect.') + + @requests_mock.mock() + def test_get_metrics(self, m): + m.get(f'{CACHET_URL}/metrics', json=JSON) + metrics = self.client.get_metrics() + + self.assertEqual(metrics, [{'id': 1}], + 'Getting metrics list is incorrect.') + + @requests_mock.mock() + def test_generate_config(self, m): + def components(): + return { + 'data': [ + { + 'id': '1', + 'name': 'apache', + 'link': 'http://abc.def', + 'enabled': True + }, + { + 'id': '2', + 'name': 'haproxy', + 'link': 'http://ghi.jkl', + 'enabled': False + }, + { + 'id': '3', + 'name': 'nginx', + 'link': 'http://mno.pqr', + 'enabled': True + } + ] + } + + m.get(f'{CACHET_URL}/components', json=components(), headers={'X-Cachet-Token': TOKEN}) + config = self.client.generate_config() + + self.assertEqual(config, { + 'cachet': { + 'api_url': CACHET_URL, + 'token': TOKEN + }, + 'endpoints': [ + { + 'name': 'apache', + 'url': 'http://abc.def', + 'method': 'GET', + 'timeout': 1, + 'expectation': [ + { + 'type': 'HTTP_STATUS', + 'status_range': '200-300', + 'incident': 'MAJOR' + } + ], + 'allowed_fails': 0, + 'frequency': 30, + 'component_id': '1', + 'action': [ + 'CREATE_INCIDENT', + 'UPDATE_STATUS', + ], + 'public_incidents': True, + }, + { + 'name': 'nginx', + 'url': 'http://mno.pqr', + 'method': 'GET', + 'timeout': 1, + 'expectation': [ + { + 'type': 'HTTP_STATUS', + 'status_range': '200-300', + 'incident': 'MAJOR' + } + ], + 'allowed_fails': 0, + 'frequency': 30, + 'component_id': '3', + 'action': [ + 'CREATE_INCIDENT', + 'UPDATE_STATUS', + ], + 'public_incidents': True, + } + ] + }, 'Generated config is incorrect.') + + @requests_mock.mock() + def test_get_default_metric_value(self, m): + m.get(f'{CACHET_URL}/metrics/123', json={'data': {'default_value': 0.456}}, headers={'X-Cachet-Token': TOKEN}) + default_metric_value = self.client.get_default_metric_value(123) + + self.assertEqual(default_metric_value, 0.456, + 'Getting default metric value is incorrect.') + + @requests_mock.mock() + def test_get_default_metric_value_invalid_id(self, m): + m.get(f'{CACHET_URL}/metrics/123', headers={'X-Cachet-Token': TOKEN}, status_code=400) + with self.assertRaises(MetricNonexistentError): + self.client.get_default_metric_value(123) + + @requests_mock.mock() + def test_get_component_status(self, m): + def json(): + return { + 'data': { + 'status': ComponentStatus.OPERATIONAL.value + } + } + + m.get(f'{CACHET_URL}/components/123', json=json(), headers={'X-Cachet-Token': TOKEN}) + status = self.client.get_component_status(123) + + self.assertEqual(status, ComponentStatus.OPERATIONAL, + 'Getting component status value is incorrect.') diff --git a/tests/test_configuration.py b/tests/test_configuration.py index 8dd65cc..2888fa8 100644 --- a/tests/test_configuration.py +++ b/tests/test_configuration.py @@ -4,12 +4,12 @@ import unittest import mock import pytest -from requests import ConnectionError, HTTPError, Timeout +import requests +import requests_mock from yaml import load, SafeLoader import cachet_url_monitor.status -sys.modules['requests'] = mock.Mock() sys.modules['logging'] = mock.Mock() from cachet_url_monitor.configuration import Configuration import os @@ -24,20 +24,20 @@ class ConfigurationTest(unittest.TestCase): sys.modules['logging'].getLogger = getLogger - def get(url, headers): - get_return = mock.Mock() - get_return.ok = True - get_return.json = mock.Mock() - get_return.json.return_value = {'data': {'status': 1, 'default_value': 0.5}} - return get_return - - sys.modules['requests'].get = get + # def get(url, headers): + # get_return = mock.Mock() + # get_return.ok = True + # get_return.json = mock.Mock() + # get_return.json.return_value = {'data': {'status': 1, 'default_value': 0.5}} + # return get_return + # + # sys.modules['requests'].get = get self.configuration = Configuration( load(open(os.path.join(os.path.dirname(__file__), 'configs/config.yml'), 'rt'), SafeLoader), 0) - sys.modules['requests'].Timeout = Timeout - sys.modules['requests'].ConnectionError = ConnectionError - sys.modules['requests'].HTTPError = HTTPError + # sys.modules['requests'].Timeout = Timeout + # sys.modules['requests'].ConnectionError = ConnectionError + # sys.modules['requests'].HTTPError = HTTPError def test_init(self): self.assertEqual(len(self.configuration.data), 2, 'Number of root elements in config.yml is incorrect') @@ -47,133 +47,69 @@ class ConfigurationTest(unittest.TestCase): 'Cachet API URL was set incorrectly') self.assertDictEqual(self.configuration.endpoint_header, {'SOME-HEADER': 'SOME-VALUE'}, 'Header is incorrect') - def test_evaluate(self): - def total_seconds(): - return 0.1 - - def request(method, url, headers, timeout=None): - response = mock.Mock() - response.status_code = 200 - response.elapsed = mock.Mock() - response.elapsed.total_seconds = total_seconds - response.text = '' - return response - - sys.modules['requests'].request = request + @requests_mock.mock() + def test_evaluate(self, m): + m.get('http://localhost:8080/swagger', text='') self.configuration.evaluate() - self.assertEqual(self.configuration.status, cachet_url_monitor.status.COMPONENT_STATUS_OPERATIONAL, + self.assertEqual(self.configuration.status, cachet_url_monitor.status.ComponentStatus.OPERATIONAL, 'Component status set incorrectly') - def test_evaluate_without_header(self): - def total_seconds(): - return 0.1 - - def request(method, url, headers=None, timeout=None): - response = mock.Mock() - response.status_code = 200 - response.elapsed = mock.Mock() - response.elapsed.total_seconds = total_seconds - response.text = '' - return response - - sys.modules['requests'].request = request + @requests_mock.mock() + def test_evaluate_without_header(self, m): + m.get('http://localhost:8080/swagger', text='') self.configuration.evaluate() - self.assertEqual(self.configuration.status, cachet_url_monitor.status.COMPONENT_STATUS_OPERATIONAL, + self.assertEqual(self.configuration.status, cachet_url_monitor.status.ComponentStatus.OPERATIONAL, 'Component status set incorrectly') - def test_evaluate_with_failure(self): - def total_seconds(): - return 0.1 - - def request(method, url, headers, timeout=None): - response = mock.Mock() - # We are expecting a 200 response, so this will fail the expectation. - response.status_code = 400 - response.elapsed = mock.Mock() - response.elapsed.total_seconds = total_seconds - response.text = '' - return response - - sys.modules['requests'].request = request + @requests_mock.mock() + def test_evaluate_with_failure(self, m): + m.get('http://localhost:8080/swagger', text='', status_code=400) self.configuration.evaluate() - self.assertEqual(self.configuration.status, cachet_url_monitor.status.COMPONENT_STATUS_MAJOR_OUTAGE, + self.assertEqual(self.configuration.status, cachet_url_monitor.status.ComponentStatus.MAJOR_OUTAGE, 'Component status set incorrectly or custom incident status is incorrectly parsed') - def test_evaluate_with_timeout(self): - def request(method, url, headers, timeout=None): - self.assertEqual(method, 'GET', 'Incorrect HTTP method') - self.assertEqual(url, 'http://localhost:8080/swagger', 'Monitored URL is incorrect') - self.assertEqual(timeout, 0.010) - - raise Timeout() - - sys.modules['requests'].request = request + @requests_mock.mock() + def test_evaluate_with_timeout(self, m): + m.get('http://localhost:8080/swagger', exc=requests.Timeout) self.configuration.evaluate() - self.assertEqual(self.configuration.status, cachet_url_monitor.status.COMPONENT_STATUS_PERFORMANCE_ISSUES, + self.assertEqual(self.configuration.status, cachet_url_monitor.status.ComponentStatus.PERFORMANCE_ISSUES, 'Component status set incorrectly') self.mock_logger.warning.assert_called_with('Request timed out') - def test_evaluate_with_connection_error(self): - def request(method, url, headers, timeout=None): - self.assertEqual(method, 'GET', 'Incorrect HTTP method') - self.assertEqual(url, 'http://localhost:8080/swagger', 'Monitored URL is incorrect') - self.assertEqual(timeout, 0.010) - - raise ConnectionError() - - sys.modules['requests'].request = request + @requests_mock.mock() + def test_evaluate_with_connection_error(self, m): + m.get('http://localhost:8080/swagger', exc=requests.ConnectionError) self.configuration.evaluate() - self.assertEqual(self.configuration.status, cachet_url_monitor.status.COMPONENT_STATUS_PARTIAL_OUTAGE, + self.assertEqual(self.configuration.status, cachet_url_monitor.status.ComponentStatus.PARTIAL_OUTAGE, 'Component status set incorrectly') self.mock_logger.warning.assert_called_with('The URL is unreachable: GET http://localhost:8080/swagger') - def test_evaluate_with_http_error(self): - def request(method, url, headers, timeout=None): - self.assertEqual(method, 'GET', 'Incorrect HTTP method') - self.assertEqual(url, 'http://localhost:8080/swagger', 'Monitored URL is incorrect') - self.assertEqual(timeout, 0.010) - - raise HTTPError() - - sys.modules['requests'].request = request + @requests_mock.mock() + def test_evaluate_with_http_error(self, m): + m.get('http://localhost:8080/swagger', exc=requests.HTTPError) self.configuration.evaluate() - self.assertEqual(self.configuration.status, cachet_url_monitor.status.COMPONENT_STATUS_PARTIAL_OUTAGE, + self.assertEqual(self.configuration.status, cachet_url_monitor.status.ComponentStatus.PARTIAL_OUTAGE, 'Component status set incorrectly') self.mock_logger.exception.assert_called_with('Unexpected HTTP response') - def test_push_status(self): - def put(url, params=None, headers=None): - self.assertEqual(url, 'https://demo.cachethq.io/api/v1/components/1', 'Incorrect cachet API URL') - self.assertDictEqual(params, {'id': 1, 'status': 1}, 'Incorrect component update parameters') - self.assertDictEqual(headers, {'X-Cachet-Token': 'token2'}, 'Incorrect component update parameters') - - response = mock.Mock() - response.status_code = 200 - return response - - sys.modules['requests'].put = put - self.assertEqual(self.configuration.status, cachet_url_monitor.status.COMPONENT_STATUS_OPERATIONAL, + @requests_mock.mock() + def test_push_status(self, m): + m.put('https://demo.cachethq.io/api/v1/components/1?id=1&status=1', headers={'X-Cachet-Token': 'token2'}) + self.assertEqual(self.configuration.status, cachet_url_monitor.status.ComponentStatus.OPERATIONAL, 'Incorrect component update parameters') self.configuration.push_status() - def test_push_status_with_failure(self): - def put(url, params=None, headers=None): - self.assertEqual(url, 'https://demo.cachethq.io/api/v1/components/1', 'Incorrect cachet API URL') - self.assertDictEqual(params, {'id': 1, 'status': 1}, 'Incorrect component update parameters') - self.assertDictEqual(headers, {'X-Cachet-Token': 'token2'}, 'Incorrect component update parameters') - - response = mock.Mock() - response.status_code = 400 - return response - - sys.modules['requests'].put = put - self.assertEqual(self.configuration.status, cachet_url_monitor.status.COMPONENT_STATUS_OPERATIONAL, + @requests_mock.mock() + def test_push_status_with_failure(self, m): + m.put('https://demo.cachethq.io/api/v1/components/1?id=1&status=1', headers={'X-Cachet-Token': 'token2'}, + status_code=400) + self.assertEqual(self.configuration.status, cachet_url_monitor.status.ComponentStatus.OPERATIONAL, 'Incorrect component update parameters') self.configuration.push_status() @@ -181,21 +117,6 @@ class ConfigurationTest(unittest.TestCase): class ConfigurationMultipleUrlTest(unittest.TestCase): @mock.patch.dict(os.environ, {'CACHET_TOKEN': 'token2'}) def setUp(self): - def getLogger(name): - self.mock_logger = mock.Mock() - return self.mock_logger - - sys.modules['logging'].getLogger = getLogger - - def get(url, headers): - get_return = mock.Mock() - get_return.ok = True - get_return.json = mock.Mock() - get_return.json.return_value = {'data': {'status': 1, 'default_value': 0.5}} - return get_return - - sys.modules['requests'].get = get - config_yaml = load(open(os.path.join(os.path.dirname(__file__), 'configs/config_multiple_urls.yml'), 'rt'), SafeLoader) self.configuration = [] @@ -203,10 +124,6 @@ class ConfigurationMultipleUrlTest(unittest.TestCase): for index in range(len(config_yaml['endpoints'])): self.configuration.append(Configuration(config_yaml, index)) - sys.modules['requests'].Timeout = Timeout - sys.modules['requests'].ConnectionError = ConnectionError - sys.modules['requests'].HTTPError = HTTPError - def test_init(self): expected_method = ['GET', 'POST'] expected_url = ['http://localhost:8080/swagger', 'http://localhost:8080/bar'] diff --git a/tests/test_expectation.py b/tests/test_expectation.py index 6f8fd8f..8d29c8f 100644 --- a/tests/test_expectation.py +++ b/tests/test_expectation.py @@ -7,6 +7,7 @@ import pytest from cachet_url_monitor.configuration import HttpStatus, Regex from cachet_url_monitor.configuration import Latency +from cachet_url_monitor.status import ComponentStatus class LatencyTest(unittest.TestCase): @@ -25,7 +26,7 @@ class LatencyTest(unittest.TestCase): request.elapsed = elapsed elapsed.total_seconds = total_seconds - assert self.expectation.get_status(request) == 1 + assert self.expectation.get_status(request) == ComponentStatus.OPERATIONAL def test_get_status_unhealthy(self): def total_seconds(): @@ -36,7 +37,7 @@ class LatencyTest(unittest.TestCase): request.elapsed = elapsed elapsed.total_seconds = total_seconds - assert self.expectation.get_status(request) == 2 + assert self.expectation.get_status(request) == ComponentStatus.PERFORMANCE_ISSUES def test_get_message(self): def total_seconds(): @@ -73,13 +74,13 @@ class HttpStatusTest(unittest.TestCase): request = mock.Mock() request.status_code = 200 - assert self.expectation.get_status(request) == 1 + assert self.expectation.get_status(request) == ComponentStatus.OPERATIONAL def test_get_status_unhealthy(self): request = mock.Mock() request.status_code = 400 - assert self.expectation.get_status(request) == 3 + assert self.expectation.get_status(request) == ComponentStatus.PARTIAL_OUTAGE def test_get_message(self): request = mock.Mock() @@ -100,13 +101,13 @@ class RegexTest(unittest.TestCase): request = mock.Mock() request.text = 'We could find stuff\n in this body.' - assert self.expectation.get_status(request) == 1 + assert self.expectation.get_status(request) == ComponentStatus.OPERATIONAL def test_get_status_unhealthy(self): request = mock.Mock() request.text = 'We will not find it here' - assert self.expectation.get_status(request) == 3 + assert self.expectation.get_status(request) == ComponentStatus.PARTIAL_OUTAGE def test_get_message(self): request = mock.Mock() diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index cef318e..4b17d93 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -3,7 +3,6 @@ import sys import unittest import mock -from yaml import load, SafeLoader sys.modules['schedule'] = mock.Mock() from cachet_url_monitor.scheduler import Agent, Scheduler @@ -46,13 +45,43 @@ class SchedulerTest(unittest.TestCase): mock_requests.get = get - self.scheduler = Scheduler(load(open('config.yml', 'r'), SafeLoader), 0) + self.agent = mock.MagicMock() + + self.scheduler = Scheduler( + { + 'endpoints': [ + { + 'name': 'foo', + 'url': 'http://localhost:8080/swagger', + 'method': 'GET', + 'expectation': [ + { + 'type': 'HTTP_STATUS', + 'status_range': '200 - 300', + 'incident': 'MAJOR', + } + ], + 'allowed_fails': 0, + 'component_id': 1, + 'action': ['CREATE_INCIDENT', 'UPDATE_STATUS'], + 'public_incidents': True, + 'latency_unit': 'ms', + 'frequency': 30 + } + ], + 'cachet': { + 'api_url': 'https: // demo.cachethq.io / api / v1', + 'token': 'my_token' + } + }, self.agent) def test_init(self): - assert self.scheduler.stop == False + self.assertFalse(self.scheduler.stop) def test_start(self): # TODO(mtakaki|2016-05-01): We need a better way of testing this method. # Leaving it as a placeholder. self.scheduler.stop = True self.scheduler.start() + + self.agent.start.assert_called()