mirror of
https://github.com/mtan93/cachet-url-monitor.git
synced 2026-03-08 05:31:58 +00:00
Initial attempt at creating incidents when an URL becomes unhealthy. Missing to actually call it from the scheduler. #3
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,3 +10,4 @@ share/
|
||||
*.egg-info
|
||||
MANIFEST
|
||||
dist/
|
||||
.idea
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#!/usr/bin/env python
|
||||
import abc
|
||||
import cachet_url_monitor.status
|
||||
import logging
|
||||
import re
|
||||
import requests
|
||||
import time
|
||||
from yaml import load
|
||||
|
||||
|
||||
# This is the mandatory fields that must be in the configuration file in this
|
||||
# same exact structure.
|
||||
configuration_mandatory_fields = {
|
||||
@@ -17,6 +17,7 @@ configuration_mandatory_fields = {
|
||||
|
||||
class ConfigurationValidationError(Exception):
|
||||
"""Exception raised when there's a validation error."""
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
@@ -28,6 +29,7 @@ class Configuration(object):
|
||||
"""Represents a configuration file, but it also includes the functionality
|
||||
of assessing the API and pushing the results to cachet.
|
||||
"""
|
||||
|
||||
def __init__(self, config_file):
|
||||
# TODO(mtakaki#1|2016-04-28): Accept overriding settings using environment
|
||||
# variables so we have a more docker-friendly approach.
|
||||
@@ -43,9 +45,20 @@ class Configuration(object):
|
||||
in self.data['endpoint']['expectation']]
|
||||
for expectation in self.expectations:
|
||||
self.logger.info('Registered expectation: %s' % (expectation,))
|
||||
|
||||
self.headers = {'X-Cachet-Token': self.data['cachet']['token']}
|
||||
|
||||
def is_create_incident(self):
|
||||
"""Will verify if the configuration is set to create incidents or not.
|
||||
:return True if the configuration is set to create incidents or False it otherwise.
|
||||
"""
|
||||
return 'create_incident' in self.data['cachet'] and self.data['cachet']['create_incident']
|
||||
|
||||
def validate(self):
|
||||
"""Validates the configuration by verifying the mandatory fields are
|
||||
present and in the correct format. If the validation fails, a
|
||||
ConfigurationValidationError is raised. Otherwise nothing will happen.
|
||||
"""
|
||||
configuration_errors = []
|
||||
for key, sub_entries in configuration_mandatory_fields.iteritems():
|
||||
if key not in self.data:
|
||||
@@ -63,8 +76,8 @@ class Configuration(object):
|
||||
configuration_errors.append('endpoint.expectation')
|
||||
|
||||
if len(configuration_errors) > 0:
|
||||
raise ConfigurationValidationError(('Config file [%s] failed '
|
||||
'validation. Missing keys: %s') % (self.config_file,
|
||||
raise ConfigurationValidationError(
|
||||
'Config file [%s] failed validation. Missing keys: %s' % (self.config_file,
|
||||
', '.join(configuration_errors)))
|
||||
|
||||
def evaluate(self):
|
||||
@@ -72,36 +85,47 @@ class Configuration(object):
|
||||
each one of the expectations, one by one. The status will be updated
|
||||
according to the expectation results.
|
||||
"""
|
||||
if hasattr(self, 'status'):
|
||||
# Keeping track of the previous status.
|
||||
self.previous_status = self.status
|
||||
|
||||
try:
|
||||
self.request = requests.request(self.data['endpoint']['method'],
|
||||
self.data['endpoint']['url'],
|
||||
timeout=self.data['endpoint']['timeout'])
|
||||
self.current_timestamp = int(time.time())
|
||||
except requests.ConnectionError:
|
||||
self.logger.warning('The URL is unreachable: %s %s' %
|
||||
(self.data['endpoint']['method'],
|
||||
self.data['endpoint']['url']))
|
||||
self.status = 3
|
||||
self.message = 'The URL is unreachable: %s %s' % (
|
||||
self.data['endpoint']['method'], self.data['endpoint']['url'])
|
||||
self.logger.warning(self.message)
|
||||
self.status = cachet_url_monitor.status.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||
return
|
||||
except requests.HTTPError:
|
||||
self.logger.exception('Unexpected HTTP response')
|
||||
self.status = 3
|
||||
self.message = 'Unexpected HTTP response'
|
||||
self.logger.exception(self.message)
|
||||
self.status = cachet_url_monitor.status.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||
return
|
||||
except requests.Timeout:
|
||||
self.logger.warning('Request timed out')
|
||||
self.status = 3
|
||||
self.message = 'Request timed out'
|
||||
self.logger.warning(self.message)
|
||||
self.status = cachet_url_monitor.status.COMPONENT_STATUS_PERFORMANCE_ISSUES
|
||||
return
|
||||
|
||||
# We initially assume the API is healthy.
|
||||
self.status = 1
|
||||
self.status = cachet_url_monitor.status.COMPONENT_STATUS_OPERATIONAL
|
||||
self.message = ''
|
||||
for expectation in self.expectations:
|
||||
status = expectation.get_status(self.request)
|
||||
|
||||
# The greater the status is, the worse the state of the API is.
|
||||
if status > self.status:
|
||||
self.status = status
|
||||
self.message = expectation.get_message(self.request)
|
||||
|
||||
def push_status(self):
|
||||
"""Pushes the status of the component to the cachet server. It will update the component
|
||||
status based on the previous call to evaluate().
|
||||
"""
|
||||
params = {'id': self.data['cachet']['component_id'], 'status':
|
||||
self.status}
|
||||
component_request = requests.put('%s/components/%d' %
|
||||
@@ -117,6 +141,9 @@ class Configuration(object):
|
||||
' status: [%d]' % (component_request.status_code, self.status))
|
||||
|
||||
def push_metrics(self):
|
||||
"""Pushes the total amount of seconds the request took to get a response from the URL.
|
||||
It only will send a request if the metric id was set in the configuration.
|
||||
"""
|
||||
if 'metric_id' in self.data['cachet'] and hasattr(self, 'request'):
|
||||
params = {'id': self.data['cachet']['metric_id'], 'value':
|
||||
self.request.elapsed.total_seconds(), 'timestamp':
|
||||
@@ -134,11 +161,47 @@ class Configuration(object):
|
||||
self.logger.warning('Metric upload failed with status [%d]' %
|
||||
(metrics_request.status_code,))
|
||||
|
||||
def push_incident(self):
|
||||
if hasattr(self, 'incident_id') and self.status == 1:
|
||||
# If the incident already exists, it means it's unhealthy. We only update it when it becomes healthy again.
|
||||
params = {'status': 4, 'visible': 1, 'component_id': self.data['cachet']['component_id'],
|
||||
'component_status': self.status, 'notify': True}
|
||||
|
||||
incident_request = requests.put('%s/incidents/%d' % (self.data['cachet']['api_url'], self.incident_id),
|
||||
params=params, headers=self.headers)
|
||||
if incident_request.ok:
|
||||
# Successful metrics upload
|
||||
self.logger.info(
|
||||
'Incident updated: component status [%d], message: "%s"' % (self.status, self.message))
|
||||
del self.incident_id
|
||||
else:
|
||||
self.logger.warning(
|
||||
'Incident update failed with status [%d], message: "%s"' % (
|
||||
incident_request.status_code, self.message))
|
||||
elif not hasattr(self, 'incident_id') and self.status != 1:
|
||||
# This is the first time the incident is being created.
|
||||
params = {'name': 'URL unavailable', 'message': self.message, 'status': 1, 'visible': 1,
|
||||
'component_id': self.data['cachet']['component_id'], 'component_status': self.status,
|
||||
'notify': True}
|
||||
incident_request = requests.post('%s/incidents' % (self.data['cachet']['api_url'],), params=params,
|
||||
headers=self.headers)
|
||||
if incident_request.ok:
|
||||
# Successful incident upload.
|
||||
self.incident_id = incident_request.json()['data']['id']
|
||||
self.logger.info(
|
||||
'Incident uploaded, API unhealthy: component status [%d], message: "%s"' % (
|
||||
self.status, self.message))
|
||||
else:
|
||||
self.logger.warning(
|
||||
'Incident upload failed with status [%d], message: "%s"' % (
|
||||
incident_request.status_code, self.message))
|
||||
|
||||
|
||||
class Expectaction(object):
|
||||
"""Base class for URL result expectations. Any new excpectation should extend
|
||||
this class and the name added to create() method.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def create(configuration):
|
||||
"""Creates a list of expectations based on the configuration types
|
||||
@@ -168,9 +231,9 @@ class HttpStatus(Expectaction):
|
||||
|
||||
def get_status(self, response):
|
||||
if response.status_code == self.status:
|
||||
return 1
|
||||
return cachet_url_monitor.status.COMPONENT_STATUS_OPERATIONAL
|
||||
else:
|
||||
return 3
|
||||
return cachet_url_monitor.status.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||
|
||||
def get_message(self, response):
|
||||
return 'Unexpected HTTP status (%s)' % (response.status_code,)
|
||||
@@ -185,9 +248,9 @@ class Latency(Expectaction):
|
||||
|
||||
def get_status(self, response):
|
||||
if response.elapsed.total_seconds() <= self.threshold:
|
||||
return 1
|
||||
return cachet_url_monitor.status.COMPONENT_STATUS_OPERATIONAL
|
||||
else:
|
||||
return 2
|
||||
return cachet_url_monitor.status.COMPONENT_STATUS_PERFORMANCE_ISSUES
|
||||
|
||||
def get_message(self, response):
|
||||
return 'Latency above threshold: %.4f' % (response.elapsed.total_seconds(),)
|
||||
@@ -199,13 +262,13 @@ class Latency(Expectaction):
|
||||
class Regex(Expectaction):
|
||||
def __init__(self, configuration):
|
||||
self.regex_string = configuration['regex']
|
||||
self.regex = re.compile(configuration['regex'])
|
||||
self.regex = re.compile(configuration['regex'], re.UNICODE + re.DOTALL)
|
||||
|
||||
def get_status(self, response):
|
||||
if self.regex.match(response.text):
|
||||
return 1
|
||||
return cachet_url_monitor.status.COMPONENT_STATUS_OPERATIONAL
|
||||
else:
|
||||
return 3
|
||||
return cachet_url_monitor.status.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||
|
||||
def get_message(self, response):
|
||||
return 'Regex did not match anything in the body'
|
||||
|
||||
10
cachet_url_monitor/status.py
Normal file
10
cachet_url_monitor/status.py
Normal file
@@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
COMPONENT_STATUS_OPERATIONAL = 1
|
||||
COMPONENT_STATUS_PERFORMANCE_ISSUES = 2
|
||||
COMPONENT_STATUS_PARTIAL_OUTAGE = 3
|
||||
COMPONENT_STATUS_MAJOR_OUTAGE = 4
|
||||
|
||||
COMPONENT_STATUSES = [COMPONENT_STATUS_OPERATIONAL,
|
||||
COMPONENT_STATUS_PERFORMANCE_ISSUES, COMPONENT_STATUS_PARTIAL_OUTAGE,
|
||||
COMPONENT_STATUS_MAJOR_OUTAGE]
|
||||
@@ -8,10 +8,11 @@ endpoint:
|
||||
- type: LATENCY
|
||||
threshold: 1
|
||||
- type: REGEX
|
||||
regex: '.*<body>.*'
|
||||
regex: '.*(<body).*'
|
||||
cachet:
|
||||
api_url: https://demo.cachethq.io/api/v1
|
||||
token: my_token
|
||||
component_id: 1
|
||||
#metric_id: 1
|
||||
create_incident: true
|
||||
frequency: 30
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
import cachet_url_monitor.status
|
||||
import mock
|
||||
import unittest
|
||||
import sys
|
||||
@@ -37,7 +38,7 @@ class ConfigurationTest(unittest.TestCase):
|
||||
sys.modules['requests'].request = request
|
||||
self.configuration.evaluate()
|
||||
|
||||
assert self.configuration.status == 1
|
||||
assert self.configuration.status == cachet_url_monitor.status.COMPONENT_STATUS_OPERATIONAL
|
||||
|
||||
def test_evaluate_with_failure(self):
|
||||
def total_seconds():
|
||||
@@ -54,7 +55,7 @@ class ConfigurationTest(unittest.TestCase):
|
||||
sys.modules['requests'].request = request
|
||||
self.configuration.evaluate()
|
||||
|
||||
assert self.configuration.status == 3
|
||||
assert self.configuration.status == cachet_url_monitor.status.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||
|
||||
def test_evaluate_with_timeout(self):
|
||||
def request(method, url, timeout=None):
|
||||
@@ -67,7 +68,7 @@ class ConfigurationTest(unittest.TestCase):
|
||||
sys.modules['requests'].request = request
|
||||
self.configuration.evaluate()
|
||||
|
||||
assert self.configuration.status == 3
|
||||
assert self.configuration.status == cachet_url_monitor.status.COMPONENT_STATUS_PERFORMANCE_ISSUES
|
||||
self.mock_logger.warning.assert_called_with('Request timed out')
|
||||
|
||||
def test_evaluate_with_connection_error(self):
|
||||
|
||||
@@ -77,11 +77,11 @@ class RegexTest(unittest.TestCase):
|
||||
self.expectation = Regex({'type': 'REGEX', 'regex': '.*(find stuff).*'})
|
||||
|
||||
def test_init(self):
|
||||
assert self.expectation.regex == re.compile('.*(find stuff).*')
|
||||
assert self.expectation.regex == re.compile('.*(find stuff).*', re.UNICODE + re.DOTALL)
|
||||
|
||||
def test_get_status_healthy(self):
|
||||
request = mock.Mock()
|
||||
request.text = 'We could find stuff in this body.'
|
||||
request.text = 'We could find stuff\n in this body.'
|
||||
|
||||
assert self.expectation.get_status(request) == 1
|
||||
|
||||
|
||||
Reference in New Issue
Block a user