mirror of
https://github.com/mtan93/cachet-url-monitor.git
synced 2026-03-08 05:31:58 +00:00
#38 - Adding support to milliseconds and different units for latency and changing http status to a range, instead of a single value.
This commit is contained in:
@@ -23,7 +23,7 @@ endpoint:
|
|||||||
timeout: 1 # seconds
|
timeout: 1 # seconds
|
||||||
expectation:
|
expectation:
|
||||||
- type: HTTP_STATUS
|
- type: HTTP_STATUS
|
||||||
status: 200
|
status_range: 200-300
|
||||||
- type: LATENCY
|
- type: LATENCY
|
||||||
threshold: 1
|
threshold: 1
|
||||||
- type: REGEX
|
- type: REGEX
|
||||||
@@ -38,6 +38,7 @@ cachet:
|
|||||||
- UPDATE_STATUS
|
- UPDATE_STATUS
|
||||||
public_incidents: true
|
public_incidents: true
|
||||||
frequency: 30
|
frequency: 30
|
||||||
|
latency_unit: ms
|
||||||
```
|
```
|
||||||
|
|
||||||
- **endpoint**, the configuration about the URL that will be monitored.
|
- **endpoint**, the configuration about the URL that will be monitored.
|
||||||
@@ -45,7 +46,7 @@ frequency: 30
|
|||||||
- **method**, the HTTP method that will be used by the monitor.
|
- **method**, the HTTP method that will be used by the monitor.
|
||||||
- **timeout**, how long we'll wait to consider the request failed. The unit of it is seconds.
|
- **timeout**, how long we'll wait to consider the request failed. The unit of it is seconds.
|
||||||
- **expectation**, the list of expectations set for the URL.
|
- **expectation**, the list of expectations set for the URL.
|
||||||
- **HTTP_STATUS**, we will verify if the response status code matches what we expect.
|
- **HTTP_STATUS**, we will verify if the response status code falls into the expected range. Please keep in mind the range is inclusive on the first number and exclusive on the second number. If just one value is specified, it will default to only the given value, for example `200` will be converted to `200-201`.
|
||||||
- **LATENCY**, we measure how long the request took to get a response and fail if it's above the threshold. The unit is in seconds.
|
- **LATENCY**, we measure how long the request took to get a response and fail if it's above the threshold. The unit is in seconds.
|
||||||
- **REGEX**, we verify if the response body matches the given regex.
|
- **REGEX**, we verify if the response body matches the given regex.
|
||||||
- **cachet**, this is the settings for our cachet server.
|
- **cachet**, this is the settings for our cachet server.
|
||||||
@@ -58,6 +59,7 @@ frequency: 30
|
|||||||
- **UPDATE_STATUS**, updates the component status
|
- **UPDATE_STATUS**, updates the component status
|
||||||
- **public_incidents**, boolean to decide if created incidents should be visible to everyone or only to logged in users. Important only if `CREATE_INCIDENT` or `UPDATE_STATUS` are set.
|
- **public_incidents**, boolean to decide if created incidents should be visible to everyone or only to logged in users. Important only if `CREATE_INCIDENT` or `UPDATE_STATUS` are set.
|
||||||
- **frequency**, how often we'll send a request to the given URL. The unit is in seconds.
|
- **frequency**, how often we'll send a request to the given URL. The unit is in seconds.
|
||||||
|
- **latency_unit**, the latency unit used when reporting the metrics. It will automatically convert to the specified unit. It's not mandatory and it will default to **seconds**. Available units: `ms`, `s`, `m`, `h`.
|
||||||
|
|
||||||
## Setting up
|
## Setting up
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ import requests
|
|||||||
from yaml import dump
|
from yaml import dump
|
||||||
from yaml import load
|
from yaml import load
|
||||||
|
|
||||||
|
import latency_unit
|
||||||
import status as st
|
import status as st
|
||||||
|
|
||||||
# This is the mandatory fields that must be in the configuration file in this
|
# This is the mandatory fields that must be in the configuration file in this
|
||||||
@@ -89,12 +90,15 @@ class Configuration(object):
|
|||||||
self.component_id = os.environ.get('CACHET_COMPONENT_ID') or self.data['cachet']['component_id']
|
self.component_id = os.environ.get('CACHET_COMPONENT_ID') or self.data['cachet']['component_id']
|
||||||
self.metric_id = os.environ.get('CACHET_METRIC_ID') or self.data['cachet'].get('metric_id')
|
self.metric_id = os.environ.get('CACHET_METRIC_ID') or self.data['cachet'].get('metric_id')
|
||||||
self.default_metric_value = self.get_default_metric_value()
|
self.default_metric_value = self.get_default_metric_value()
|
||||||
|
# The latency_unit configuration is not mandatory and we fallback to seconds, by default.
|
||||||
|
self.latency_unit = os.environ.get('LATENCY_UNIT') or self.data['cachet'].get('latency_unit') or 's'
|
||||||
|
|
||||||
# We need the current status so we monitor the status changes. This is necessary for creating incidents.
|
# We need the current status so we monitor the status changes. This is necessary for creating incidents.
|
||||||
self.status = get_current_status(self.api_url, self.component_id, self.headers)
|
self.status = get_current_status(self.api_url, self.component_id, self.headers)
|
||||||
|
|
||||||
# Get remaining settings
|
# Get remaining settings
|
||||||
self.public_incidents = int(os.environ.get('CACHET_PUBLIC_INCIDENTS') or self.data['cachet']['public_incidents'])
|
self.public_incidents = int(
|
||||||
|
os.environ.get('CACHET_PUBLIC_INCIDENTS') or self.data['cachet']['public_incidents'])
|
||||||
|
|
||||||
self.logger.info('Monitoring URL: %s %s' % (self.endpoint_method, self.endpoint_url))
|
self.logger.info('Monitoring URL: %s %s' % (self.endpoint_method, self.endpoint_url))
|
||||||
self.expectations = [Expectaction.create(expectation) for expectation in self.data['endpoint']['expectation']]
|
self.expectations = [Expectaction.create(expectation) for expectation in self.data['endpoint']['expectation']]
|
||||||
@@ -207,7 +211,9 @@ class Configuration(object):
|
|||||||
In case of failed connection trial pushes the default metric value.
|
In case of failed connection trial pushes the default metric value.
|
||||||
"""
|
"""
|
||||||
if 'metric_id' in self.data['cachet'] and hasattr(self, 'request'):
|
if 'metric_id' in self.data['cachet'] and hasattr(self, 'request'):
|
||||||
value = self.default_metric_value if self.status != 1 else self.request.elapsed.total_seconds()
|
# We convert the elapsed time from the request, in seconds, to the configured unit.
|
||||||
|
value = self.default_metric_value if self.status != 1 else latency_unit.convert_to_unit(self.latency_unit,
|
||||||
|
self.request.elapsed.total_seconds())
|
||||||
params = {'id': self.metric_id, 'value': value,
|
params = {'id': self.metric_id, 'value': value,
|
||||||
'timestamp': self.current_timestamp}
|
'timestamp': self.current_timestamp}
|
||||||
metrics_request = requests.post('%s/metrics/%d/points' % (self.api_url, self.metric_id), params=params,
|
metrics_request = requests.post('%s/metrics/%d/points' % (self.api_url, self.metric_id), params=params,
|
||||||
@@ -226,7 +232,8 @@ class Configuration(object):
|
|||||||
"""
|
"""
|
||||||
if hasattr(self, 'incident_id') and self.status == st.COMPONENT_STATUS_OPERATIONAL:
|
if hasattr(self, 'incident_id') and self.status == st.COMPONENT_STATUS_OPERATIONAL:
|
||||||
# If the incident already exists, it means it was unhealthy but now it's healthy again.
|
# If the incident already exists, it means it was unhealthy but now it's healthy again.
|
||||||
params = {'status': 4, 'visible': self.public_incidents, 'component_id': self.component_id, 'component_status': self.status,
|
params = {'status': 4, 'visible': self.public_incidents, 'component_id': self.component_id,
|
||||||
|
'component_status': self.status,
|
||||||
'notify': True}
|
'notify': True}
|
||||||
|
|
||||||
incident_request = requests.put('%s/incidents/%d' % (self.api_url, self.incident_id), params=params,
|
incident_request = requests.put('%s/incidents/%d' % (self.api_url, self.incident_id), params=params,
|
||||||
@@ -287,10 +294,19 @@ class Expectaction(object):
|
|||||||
|
|
||||||
class HttpStatus(Expectaction):
|
class HttpStatus(Expectaction):
|
||||||
def __init__(self, configuration):
|
def __init__(self, configuration):
|
||||||
self.status = configuration['status']
|
self.status_range = self.parse_range(configuration['status_range'])
|
||||||
|
|
||||||
|
def parse_range(self, range_string):
|
||||||
|
statuses = range_string.split("-")
|
||||||
|
if len(statuses) == 1:
|
||||||
|
# When there was no range given, we should treat the first number as a single status check.
|
||||||
|
return (int(statuses[0]), int(statuses[0]) + 1)
|
||||||
|
else:
|
||||||
|
# We shouldn't look into more than one value, as this is a range value.
|
||||||
|
return (int(statuses[0]), int(statuses[1]))
|
||||||
|
|
||||||
def get_status(self, response):
|
def get_status(self, response):
|
||||||
if response.status_code == self.status:
|
if response.status_code >= self.status_range[0] and response.status_code < self.status_range[1]:
|
||||||
return st.COMPONENT_STATUS_OPERATIONAL
|
return st.COMPONENT_STATUS_OPERATIONAL
|
||||||
else:
|
else:
|
||||||
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||||
@@ -299,7 +315,7 @@ class HttpStatus(Expectaction):
|
|||||||
return 'Unexpected HTTP status (%s)' % (response.status_code,)
|
return 'Unexpected HTTP status (%s)' % (response.status_code,)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return repr('HTTP status: %s' % (self.status,))
|
return repr('HTTP status range: %s' % (self.status_range,))
|
||||||
|
|
||||||
|
|
||||||
class Latency(Expectaction):
|
class Latency(Expectaction):
|
||||||
|
|||||||
16
cachet_url_monitor/latency_unit.py
Normal file
16
cachet_url_monitor/latency_unit.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
seconds_per_unit = {"ms": 1000, "milliseconds": 1000, "s": 1, "seconds": 1, "m": float(1) / 60,
|
||||||
|
"minutes": float(1) / 60, "h": float(1) / 3600, "hours": float(1) / 3600}
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_unit(time_unit, value):
|
||||||
|
"""
|
||||||
|
Will convert the given value from seconds to the given time_unit.
|
||||||
|
|
||||||
|
:param time_unit: The time unit to which the value will be converted to, from seconds.
|
||||||
|
This is a string parameter. The unit must be in the short form.
|
||||||
|
:param value: The given value that will be converted. This value must be in seconds.
|
||||||
|
:return: The converted value.
|
||||||
|
"""
|
||||||
|
return value * seconds_per_unit[time_unit]
|
||||||
@@ -1,4 +1,9 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
"""
|
||||||
|
This file defines all the different status different values.
|
||||||
|
These are all constants and are coupled to cachet's API configuration.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
COMPONENT_STATUS_OPERATIONAL = 1
|
COMPONENT_STATUS_OPERATIONAL = 1
|
||||||
COMPONENT_STATUS_PERFORMANCE_ISSUES = 2
|
COMPONENT_STATUS_PERFORMANCE_ISSUES = 2
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ endpoint:
|
|||||||
timeout: 0.01
|
timeout: 0.01
|
||||||
expectation:
|
expectation:
|
||||||
- type: HTTP_STATUS
|
- type: HTTP_STATUS
|
||||||
status: 200
|
status_range: 200-300
|
||||||
- type: LATENCY
|
- type: LATENCY
|
||||||
threshold: 1
|
threshold: 1
|
||||||
- type: REGEX
|
- type: REGEX
|
||||||
@@ -19,3 +19,4 @@ cachet:
|
|||||||
- UPDATE_STATUS
|
- UPDATE_STATUS
|
||||||
public_incidents: true
|
public_incidents: true
|
||||||
frequency: 30
|
frequency: 30
|
||||||
|
latency_unit: ms
|
||||||
|
|||||||
@@ -2,10 +2,11 @@
|
|||||||
import sys
|
import sys
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import cachet_url_monitor.status
|
|
||||||
import mock
|
import mock
|
||||||
from requests import ConnectionError, HTTPError, Timeout
|
from requests import ConnectionError, HTTPError, Timeout
|
||||||
|
|
||||||
|
import cachet_url_monitor.status
|
||||||
|
|
||||||
sys.modules['requests'] = mock.Mock()
|
sys.modules['requests'] = mock.Mock()
|
||||||
sys.modules['logging'] = mock.Mock()
|
sys.modules['logging'] = mock.Mock()
|
||||||
from cachet_url_monitor.configuration import Configuration
|
from cachet_url_monitor.configuration import Configuration
|
||||||
@@ -38,7 +39,7 @@ class ConfigurationTest(unittest.TestCase):
|
|||||||
sys.modules['requests'].HTTPError = HTTPError
|
sys.modules['requests'].HTTPError = HTTPError
|
||||||
|
|
||||||
def test_init(self):
|
def test_init(self):
|
||||||
self.assertEqual(len(self.configuration.data), 3, 'Configuration data size is incorrect')
|
self.assertEqual(len(self.configuration.data), 4, 'Number of root elements in config.yml is incorrect')
|
||||||
self.assertEquals(len(self.configuration.expectations), 3, 'Number of expectations read from file is incorrect')
|
self.assertEquals(len(self.configuration.expectations), 3, 'Number of expectations read from file is incorrect')
|
||||||
self.assertDictEqual(self.configuration.headers, {'X-Cachet-Token': 'token2'}, 'Header was not set correctly')
|
self.assertDictEqual(self.configuration.headers, {'X-Cachet-Token': 'token2'}, 'Header was not set correctly')
|
||||||
self.assertEquals(self.configuration.api_url, 'https://demo.cachethq.io/api/v1',
|
self.assertEquals(self.configuration.api_url, 'https://demo.cachethq.io/api/v1',
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
import re
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import mock
|
import mock
|
||||||
import re
|
import pytest
|
||||||
|
|
||||||
from cachet_url_monitor.configuration import HttpStatus, Regex
|
from cachet_url_monitor.configuration import HttpStatus, Regex
|
||||||
from cachet_url_monitor.configuration import Latency
|
from cachet_url_monitor.configuration import Latency
|
||||||
|
|
||||||
@@ -51,10 +53,21 @@ class LatencyTest(unittest.TestCase):
|
|||||||
|
|
||||||
class HttpStatusTest(unittest.TestCase):
|
class HttpStatusTest(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.expectation = HttpStatus({'type': 'HTTP_STATUS', 'status': 200})
|
self.expectation = HttpStatus({'type': 'HTTP_STATUS', 'status_range': "200-300"})
|
||||||
|
|
||||||
def test_init(self):
|
def test_init(self):
|
||||||
assert self.expectation.status == 200
|
assert self.expectation.status_range == (200, 300)
|
||||||
|
|
||||||
|
def test_init_with_one_status(self):
|
||||||
|
"""With only one value, we still expect a valid tuple"""
|
||||||
|
self.expectation = HttpStatus({'type': 'HTTP_STATUS', 'status_range': "200"})
|
||||||
|
|
||||||
|
assert self.expectation.status_range == (200, 201)
|
||||||
|
|
||||||
|
def test_init_with_invalid_number(self):
|
||||||
|
"""Invalid values should just fail with a ValueError, as we can't convert it to int."""
|
||||||
|
with pytest.raises(ValueError) as excinfo:
|
||||||
|
self.expectation = HttpStatus({'type': 'HTTP_STATUS', 'status_range': "foo"})
|
||||||
|
|
||||||
def test_get_status_healthy(self):
|
def test_get_status_healthy(self):
|
||||||
request = mock.Mock()
|
request = mock.Mock()
|
||||||
|
|||||||
18
tests/test_latency_unit.py
Normal file
18
tests/test_latency_unit.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from cachet_url_monitor.latency_unit import convert_to_unit
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigurationTest(unittest.TestCase):
|
||||||
|
def test_convert_to_unit_ms(self):
|
||||||
|
assert convert_to_unit("ms", 1) == 1000
|
||||||
|
|
||||||
|
def test_convert_to_unit_s(self):
|
||||||
|
assert convert_to_unit("s", 20) == 20
|
||||||
|
|
||||||
|
def test_convert_to_unit_m(self):
|
||||||
|
assert convert_to_unit("m", 3) == float(3) / 60
|
||||||
|
|
||||||
|
def test_convert_to_unit_h(self):
|
||||||
|
assert convert_to_unit("h", 7200) == 2
|
||||||
Reference in New Issue
Block a user