From 390f3997505f58f100123d0e341db5c35e8bc6e0 Mon Sep 17 00:00:00 2001 From: Pieter-Jan Briers Date: Wed, 20 Mar 2024 09:17:38 +0100 Subject: [PATCH] Add IMetricsManager.UpdateMetrics system This callback enables code to update its metrics only when required. Needed this for SS14 since online admin count stats are not something I want to update on an "arbitrary" basis. Tons of consideration and commenting for how this plays in with stuff like dotnet-counters. Added the metrics.update_interval CVar to act as a fallback for this event when dotnet-counters and such is in use. --- RELEASE-NOTES.md | 3 +- Robust.Server/BaseServer.cs | 4 +- .../MetricsManager.MetricsServer.cs | 17 ++++- .../MetricsManager.UpdateMetrics.cs | 62 +++++++++++++++++++ Robust.Server/DataMetrics/MetricsManager.cs | 44 +++++++++++-- Robust.Server/ServerIoC.cs | 1 + Robust.Shared/Asynchronous/TaskManager.cs | 27 ++++++++ Robust.Shared/CVars.cs | 29 +++++++++ 8 files changed, 178 insertions(+), 9 deletions(-) create mode 100644 Robust.Server/DataMetrics/MetricsManager.UpdateMetrics.cs diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index b65bee19a..01e0f6329 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -39,7 +39,8 @@ END TEMPLATE--> ### New features -*None yet* +* Made a new `IMetricsManager` interface with an `UpdateMetrics` event that can be used to update Prometheus metrics whenever they are scraped. + * Also added a `metrics.update_interval` CVar to go along with this, when metrics are scraped without usage of Prometheus directly. ### Bugfixes diff --git a/Robust.Server/BaseServer.cs b/Robust.Server/BaseServer.cs index f8e92bc7c..8bc2cf55f 100644 --- a/Robust.Server/BaseServer.cs +++ b/Robust.Server/BaseServer.cs @@ -89,7 +89,7 @@ namespace Robust.Server [Dependency] private readonly IWatchdogApi _watchdogApi = default!; [Dependency] private readonly HubManager _hubManager = default!; [Dependency] private readonly IScriptHost _scriptHost = default!; - [Dependency] private readonly IMetricsManager _metricsManager = default!; + [Dependency] private readonly IMetricsManagerInternal _metricsManager = default!; [Dependency] private readonly IPlayerManager _playerManager = default!; [Dependency] private readonly IRobustMappedStringSerializer _stringSerializer = default!; [Dependency] private readonly ILocalizationManagerInternal _loc = default!; @@ -749,6 +749,8 @@ namespace Robust.Server _hubManager.Heartbeat(); _modLoader.BroadcastUpdate(ModUpdateLevel.FramePostEngine, frameEventArgs); + + _metricsManager.FrameUpdate(); } void IPostInjectInit.PostInject() diff --git a/Robust.Server/DataMetrics/MetricsManager.MetricsServer.cs b/Robust.Server/DataMetrics/MetricsManager.MetricsServer.cs index 958a3e903..35c6abbb6 100644 --- a/Robust.Server/DataMetrics/MetricsManager.MetricsServer.cs +++ b/Robust.Server/DataMetrics/MetricsManager.MetricsServer.cs @@ -18,13 +18,20 @@ internal sealed partial class MetricsManager private sealed class ManagedHttpListenerMetricsServer : MetricHandler { private readonly ISawmill _sawmill; + private readonly Func? _beforeCollect; private readonly HttpListener _listener; private readonly CollectorRegistry _registry; - public ManagedHttpListenerMetricsServer(ISawmill sawmill, string host, int port, string url = "metrics/", - CollectorRegistry? registry = null) + public ManagedHttpListenerMetricsServer( + ISawmill sawmill, + string host, + int port, + string url = "metrics/", + CollectorRegistry? registry = null, + Func? beforeCollect = null) { _sawmill = sawmill; + _beforeCollect = beforeCollect; _listener = new HttpListener(); _listener.Prefixes.Add($"http://{host}:{port}/{url}"); _registry = registry ?? Metrics.DefaultRegistry; @@ -57,6 +64,12 @@ internal sealed partial class MetricsManager { MetricsEvents.Log.ScrapeStart(); + // prometheus-net does have a "before collect" callback of its own. + // But it doesn't get ran before stuff like their System.Diagnostics.Metrics integration, + // So I'm just gonna make my own here. + if (_beforeCollect != null) + await _beforeCollect(cancel); + var stream = resp.OutputStream; // prometheus-net is a terrible library and have to do all this insanity, // just to handle the ScrapeFailedException correctly. diff --git a/Robust.Server/DataMetrics/MetricsManager.UpdateMetrics.cs b/Robust.Server/DataMetrics/MetricsManager.UpdateMetrics.cs new file mode 100644 index 000000000..d91ffa1b8 --- /dev/null +++ b/Robust.Server/DataMetrics/MetricsManager.UpdateMetrics.cs @@ -0,0 +1,62 @@ +using System; +using System.Threading; +using System.Threading.Tasks; +using Robust.Shared; +using Robust.Shared.Asynchronous; +using Robust.Shared.IoC; +using Robust.Shared.Timing; + +namespace Robust.Server.DataMetrics; + +internal sealed partial class MetricsManager +{ + // + // Handles the implementation of the "UpdateMetrics" callback. + // + + public event Action? UpdateMetrics; + + private TimeSpan _fixedUpdateInterval; + private TimeSpan _nextFixedUpdate; + + [Dependency] private readonly IGameTiming _gameTiming = default!; + + private void InitializeUpdateMetrics() + { + _cfg.OnValueChanged( + CVars.MetricsUpdateInterval, + seconds => + { + _fixedUpdateInterval = TimeSpan.FromSeconds(seconds); + _nextFixedUpdate = _gameTiming.RealTime + _fixedUpdateInterval; + }, + true); + } + + public void FrameUpdate() + { + if (_fixedUpdateInterval == TimeSpan.Zero) + return; + + var time = _gameTiming.RealTime; + + if (_nextFixedUpdate > time) + return; + + _nextFixedUpdate = time + _fixedUpdateInterval; + + _sawmill.Verbose("Running fixed metrics update"); + UpdateMetrics?.Invoke(); + } + + private async Task BeforeCollectCallback(CancellationToken cancel) + { + if (UpdateMetrics == null) + return; + + await _taskManager.TaskOnMainThread(() => + { + UpdateMetrics?.Invoke(); + }); + } +} diff --git a/Robust.Server/DataMetrics/MetricsManager.cs b/Robust.Server/DataMetrics/MetricsManager.cs index 29c0d29d1..2dbbb661b 100644 --- a/Robust.Server/DataMetrics/MetricsManager.cs +++ b/Robust.Server/DataMetrics/MetricsManager.cs @@ -3,24 +3,50 @@ using System.Diagnostics.Tracing; using System.Globalization; using System.Linq; using System.Threading.Tasks; +using Prometheus; using Prometheus.DotNetRuntime; using Prometheus.DotNetRuntime.Metrics.Producers; using Robust.Shared; +using Robust.Shared.Asynchronous; using Robust.Shared.Configuration; using Robust.Shared.GameObjects; using Robust.Shared.IoC; using Robust.Shared.Log; using EventSource = System.Diagnostics.Tracing.EventSource; -#nullable enable - namespace Robust.Server.DataMetrics; -internal sealed partial class MetricsManager : IMetricsManager, IDisposable +/// +/// Manages OpenTelemetry metrics exposure. +/// +/// +/// +/// If enabled via , metrics about the game server are exposed via a HTTP server +/// in an OpenTelemetry-compatible format (Prometheus). +/// +/// +/// Metrics can be added through the types in System.Diagnostics.Metrics or Prometheus. +/// +/// +public interface IMetricsManager +{ + /// + /// An event that gets raised on the main thread when complex metrics should be updated. + /// + /// + /// This event is raised on the main thread before a Prometheus collection happens, + /// and also with a fixed interval if is set. + /// You can use it to update complex metrics that can't "just" be stuffed into a counter. + /// + event Action UpdateMetrics; +} + +internal sealed partial class MetricsManager : IMetricsManagerInternal, IDisposable { [Dependency] private readonly IConfigurationManager _cfg = default!; [Dependency] private readonly IEntitySystemManager _entitySystemManager = default!; [Dependency] private readonly ILogManager _logManager = default!; + [Dependency] private readonly ITaskManager _taskManager = default!; private bool _initialized; @@ -55,6 +81,8 @@ internal sealed partial class MetricsManager : IMetricsManager, IDisposable { _cfg.OnValueChanged(cVar, _ => Reload()); } + + InitializeUpdateMetrics(); } private async Task Stop() @@ -100,7 +128,12 @@ internal sealed partial class MetricsManager : IMetricsManager, IDisposable _sawmill.Info("Prometheus metrics enabled, host: {1} port: {0}", port, host); var sawmill = Logger.GetSawmill("metrics.server"); - _metricServer = new ManagedHttpListenerMetricsServer(sawmill, host, port); + _metricServer = new ManagedHttpListenerMetricsServer( + sawmill, + host, + port, + registry: Metrics.DefaultRegistry, + beforeCollect: BeforeCollectCallback); _metricServer.Start(); if (_cfg.GetCVar(CVars.MetricsRuntime)) @@ -190,7 +223,8 @@ internal sealed partial class MetricsManager : IMetricsManager, IDisposable } } -internal interface IMetricsManager +internal interface IMetricsManagerInternal : IMetricsManager { void Initialize(); + void FrameUpdate(); } diff --git a/Robust.Server/ServerIoC.cs b/Robust.Server/ServerIoC.cs index 8d9ca24fc..8b0710573 100644 --- a/Robust.Server/ServerIoC.cs +++ b/Robust.Server/ServerIoC.cs @@ -80,6 +80,7 @@ namespace Robust.Server deps.Register(); deps.Register(); deps.Register(); + deps.Register(); deps.Register(); deps.Register(); deps.Register(); diff --git a/Robust.Shared/Asynchronous/TaskManager.cs b/Robust.Shared/Asynchronous/TaskManager.cs index 2662a1ace..709b46df5 100644 --- a/Robust.Shared/Asynchronous/TaskManager.cs +++ b/Robust.Shared/Asynchronous/TaskManager.cs @@ -74,4 +74,31 @@ namespace Robust.Shared.Asynchronous /// void BlockWaitOnTask(Task task); } + + internal static class TaskManagerExt + { + /// + /// Run a callback on the main thread, returning a task that represents its completion. + /// + /// + public static Task TaskOnMainThread(this ITaskManager taskManager, Action callback) + { + var tcs = new TaskCompletionSource(); + + taskManager.RunOnMainThread(() => + { + try + { + callback(); + tcs.SetResult(); + } + catch (Exception e) + { + tcs.TrySetException(e); + } + }); + + return tcs.Task; + } + } } diff --git a/Robust.Shared/CVars.cs b/Robust.Shared/CVars.cs index 7f715bf91..c06211b57 100644 --- a/Robust.Shared/CVars.cs +++ b/Robust.Shared/CVars.cs @@ -416,6 +416,35 @@ namespace Robust.Shared public static readonly CVarDef MetricsPort = CVarDef.Create("metrics.port", 44880, CVar.SERVERONLY); + /// + /// Sets a fixed interval (seconds) for internal collection of certain metrics, + /// when not using the Prometheus metrics server. + /// + /// + /// + /// Most metrics are internally implemented directly via the prometheus-net library. + /// These metrics can only be scraped by the Prometheus metrics server (). + /// However, newer metrics are implemented with the System.Diagnostics.Metrics library in the .NET runtime. + /// These metrics can be scraped through more means, such as dotnet counters. + /// + /// + /// While many metrics are simple counters that can "just" be reported, + /// some metrics require more advanced internal work and need some code to be ran internally + /// before their values are made current. When collecting metrics via a + /// method other than the Prometheus metrics server, these metrics pose a problem, + /// as there is no way for the game to update them before collection properly. + /// + /// + /// This CVar acts as a fallback: if set to a value other than 0 (disabled), + /// these metrics will be internally updated at the interval provided. + /// + /// + /// This does not need to be enabled if metrics are collected exclusively via the Prometheus metrics server. + /// + /// + public static readonly CVarDef MetricsUpdateInterval = + CVarDef.Create("metrics.update_interval", 0f, CVar.SERVERONLY); + /// /// Enable detailed runtime metrics. Empty to disable. ///