我用这个来进行健康监控,允许用户指定回调,并且支持线程化、主动监视和被动监视:
https://gist.github.com/earonesty/4ccf8fc9bde6feac30e5c155e54dfa5f
我粘贴了下面的代码,没有测试(比代码多):
class MonitorInstance:
def __init__(self, parent, label, func, threshold, active, metric):
self.parent = parent
self.label = label
self.func = func
self.threshold = threshold
self.active = active
self.metric = metric
self.__errors = None
def ok(self):
if self.__errors is None or self.__errors:
self.parent._ok(self)
self.__errors = 0
if self.metric:
self.metric.set(0)
def error(self):
if not self.__errors:
self.parent._error(self)
if self.__errors is None:
self.__errors = 0
self.__errors += 1
if self.metric:
self.metric.inc()
def check(self):
try:
self.func()
self.ok()
except Exception as e:
log.error("%s error: %s", self.label, e)
self.error()
@property
def healthy(self):
return self.__errors < self.threshold
DEFAULT_THRESHOLD = 1
DEFAULT_CHECKSECS = 5
class Monitor:
def __init__(self, health_callback=None, check_secs=DEFAULT_CHECKSECS, use_thread=False):
self.active = []
self.alerts = set()
self.health_callback = health_callback
self.healthy = False
self.check_secs = check_secs
self.last_check = 0
if use_thread:
assert self.check_secs > 0, "threads need to sleep"
threading.Thread(target=self._thread_loop, daemon=True).start()
def add(self, label, check, threshold=DEFAULT_THRESHOLD, active=False, metric=None):
inst = MonitorInstance(self, label, check, threshold, active, metric)
if active:
self.active.append(inst)
inst.check()
return inst
def _error(self, inst):
self.alerts.add(inst)
if self.healthy:
self._callback(False)
self.healthy = False
def _thread_loop(self):
while True:
self.check()
time.sleep(self.check_secs)
def _callback(self, value):
if not self.health_callback is None:
try:
self.health_callback(value)
except:
log.exception("deadlyexes: error calling %s", self.health_callback)
def _ok(self, inst):
self.alerts.discard(inst)
if not self.healthy and not self.alerts:
self._callback(True)
self.healthy = True
def check(self, force=False):
if not force and (time.time() < (self.last_check + self.check_secs)):
return False
checked=False
for inst in list(self.alerts) + self.active:
try:
checked=True
inst.check()
except:
pass
return checked