Fixed: Delay health check notifications on startup

Closes #4381
This commit is contained in:
Mark McDowall 2022-03-06 21:26:13 -08:00
parent e280897bc7
commit 07f0db477a
3 changed files with 44 additions and 8 deletions

View File

@ -5,10 +5,12 @@ namespace NzbDrone.Core.HealthCheck
public class HealthCheckFailedEvent : IEvent
{
public HealthCheck HealthCheck { get; private set; }
public bool IsInStartupGraceperiod { get; private set; }
public HealthCheckFailedEvent(HealthCheck healthCheck)
public HealthCheckFailedEvent(HealthCheck healthCheck, bool isInStartupGraceperiod)
{
HealthCheck = healthCheck;
IsInStartupGraceperiod = isInStartupGraceperiod;
}
}
}

View File

@ -3,16 +3,12 @@ using System.Collections.Generic;
using System.Linq;
using NLog;
using NzbDrone.Common.Cache;
using NzbDrone.Common.Extensions;
using NzbDrone.Common.EnvironmentInfo;
using NzbDrone.Common.Messaging;
using NzbDrone.Common.Reflection;
using NzbDrone.Core.Configuration.Events;
using NzbDrone.Core.Download;
using NzbDrone.Core.Indexers;
using NzbDrone.Core.Lifecycle;
using NzbDrone.Core.Messaging.Commands;
using NzbDrone.Core.Messaging.Events;
using NzbDrone.Core.ThingiProvider.Events;
namespace NzbDrone.Core.HealthCheck
{
@ -26,6 +22,7 @@ namespace NzbDrone.Core.HealthCheck
IHandleAsync<ApplicationStartedEvent>,
IHandleAsync<IEvent>
{
private readonly DateTime _startupGracePeriodEndTime;
private readonly IProvideHealthCheck[] _healthChecks;
private readonly IProvideHealthCheck[] _startupHealthChecks;
private readonly IProvideHealthCheck[] _scheduledHealthChecks;
@ -36,9 +33,13 @@ namespace NzbDrone.Core.HealthCheck
private readonly ICached<HealthCheck> _healthCheckResults;
private bool _hasRunHealthChecksAfterGracePeriod = false;
private bool _isRunningHealthChecksAfterGracePeriod = false;
public HealthCheckService(IEnumerable<IProvideHealthCheck> healthChecks,
IEventAggregator eventAggregator,
ICacheManager cacheManager,
IRuntimeInfo runtimeInfo,
Logger logger)
{
_healthChecks = healthChecks.ToArray();
@ -51,6 +52,7 @@ namespace NzbDrone.Core.HealthCheck
_startupHealthChecks = _healthChecks.Where(v => v.CheckOnStartup).ToArray();
_scheduledHealthChecks = _healthChecks.Where(v => v.CheckOnSchedule).ToArray();
_eventDrivenHealthChecks = GetEventDrivenHealthChecks();
_startupGracePeriodEndTime = runtimeInfo.StartTime + TimeSpan.FromMinutes(15);
}
public List<HealthCheck> Results()
@ -88,7 +90,7 @@ namespace NzbDrone.Core.HealthCheck
{
if (_healthCheckResults.Find(result.Source.Name) == null)
{
_eventAggregator.PublishEvent(new HealthCheckFailedEvent(result));
_eventAggregator.PublishEvent(new HealthCheckFailedEvent(result, !_hasRunHealthChecksAfterGracePeriod));
}
_healthCheckResults.Set(result.Source.Name, result);
@ -122,6 +124,31 @@ namespace NzbDrone.Core.HealthCheck
return;
}
// If we haven't previously re-run health checks after startup grace period run startup checks again and track so they aren't run again.
// Return early after re-running checks to avoid triggering checks multiple times.
if (!_hasRunHealthChecksAfterGracePeriod && !_isRunningHealthChecksAfterGracePeriod && DateTime.UtcNow > _startupGracePeriodEndTime)
{
_isRunningHealthChecksAfterGracePeriod = true;
PerformHealthCheck(_startupHealthChecks);
// Update after running health checks so new failure notifications aren't sent 2x.
_hasRunHealthChecksAfterGracePeriod = true;
// Explicitly notify for any failed checks since existing failed results would not have sent events.
var results = _healthCheckResults.Values.ToList();
foreach (var result in results)
{
_eventAggregator.PublishEvent(new HealthCheckFailedEvent(result, false));
}
_isRunningHealthChecksAfterGracePeriod = false;
return;
}
IEventDrivenHealthCheck[] checks;
if (!_eventDrivenHealthChecks.TryGetValue(message.GetType(), out checks))
{
@ -143,7 +170,6 @@ namespace NzbDrone.Core.HealthCheck
}
}
// TODO: Add debounce
PerformHealthCheck(filteredChecks.ToArray());

View File

@ -272,6 +272,14 @@ namespace NzbDrone.Core.Notifications
public void Handle(HealthCheckFailedEvent message)
{
// Don't send health check notifications during the start up grace period,
// once that duration expires they they'll be retested and fired off if necessary.
if (message.IsInStartupGraceperiod)
{
return;
}
foreach (var notification in _notificationFactory.OnHealthIssueEnabled())
{
try