Add IMetricsManager.UpdateMetrics system

This callback enables code to update its metrics only when required. Needed this for SS14 since online admin count stats are not something I want to update on an "arbitrary" basis.

Tons of consideration and commenting for how this plays in with stuff like dotnet-counters. Added the metrics.update_interval CVar to act as a fallback for this event when dotnet-counters and such is in use.
This commit is contained in:
Pieter-Jan Briers
2024-03-20 09:17:38 +01:00
parent 28cf7442ce
commit 390f399750
8 changed files with 178 additions and 9 deletions

View File

@@ -39,7 +39,8 @@ END TEMPLATE-->
### New features
*None yet*
* Made a new `IMetricsManager` interface with an `UpdateMetrics` event that can be used to update Prometheus metrics whenever they are scraped.
* Also added a `metrics.update_interval` CVar to go along with this, when metrics are scraped without usage of Prometheus directly.
### Bugfixes

View File

@@ -89,7 +89,7 @@ namespace Robust.Server
[Dependency] private readonly IWatchdogApi _watchdogApi = default!;
[Dependency] private readonly HubManager _hubManager = default!;
[Dependency] private readonly IScriptHost _scriptHost = default!;
[Dependency] private readonly IMetricsManager _metricsManager = default!;
[Dependency] private readonly IMetricsManagerInternal _metricsManager = default!;
[Dependency] private readonly IPlayerManager _playerManager = default!;
[Dependency] private readonly IRobustMappedStringSerializer _stringSerializer = default!;
[Dependency] private readonly ILocalizationManagerInternal _loc = default!;
@@ -749,6 +749,8 @@ namespace Robust.Server
_hubManager.Heartbeat();
_modLoader.BroadcastUpdate(ModUpdateLevel.FramePostEngine, frameEventArgs);
_metricsManager.FrameUpdate();
}
void IPostInjectInit.PostInject()

View File

@@ -18,13 +18,20 @@ internal sealed partial class MetricsManager
private sealed class ManagedHttpListenerMetricsServer : MetricHandler
{
private readonly ISawmill _sawmill;
private readonly Func<CancellationToken, Task>? _beforeCollect;
private readonly HttpListener _listener;
private readonly CollectorRegistry _registry;
public ManagedHttpListenerMetricsServer(ISawmill sawmill, string host, int port, string url = "metrics/",
CollectorRegistry? registry = null)
public ManagedHttpListenerMetricsServer(
ISawmill sawmill,
string host,
int port,
string url = "metrics/",
CollectorRegistry? registry = null,
Func<CancellationToken, Task>? beforeCollect = null)
{
_sawmill = sawmill;
_beforeCollect = beforeCollect;
_listener = new HttpListener();
_listener.Prefixes.Add($"http://{host}:{port}/{url}");
_registry = registry ?? Metrics.DefaultRegistry;
@@ -57,6 +64,12 @@ internal sealed partial class MetricsManager
{
MetricsEvents.Log.ScrapeStart();
// prometheus-net does have a "before collect" callback of its own.
// But it doesn't get ran before stuff like their System.Diagnostics.Metrics integration,
// So I'm just gonna make my own here.
if (_beforeCollect != null)
await _beforeCollect(cancel);
var stream = resp.OutputStream;
// prometheus-net is a terrible library and have to do all this insanity,
// just to handle the ScrapeFailedException correctly.

View File

@@ -0,0 +1,62 @@
using System;
using System.Threading;
using System.Threading.Tasks;
using Robust.Shared;
using Robust.Shared.Asynchronous;
using Robust.Shared.IoC;
using Robust.Shared.Timing;
namespace Robust.Server.DataMetrics;
internal sealed partial class MetricsManager
{
//
// Handles the implementation of the "UpdateMetrics" callback.
//
public event Action? UpdateMetrics;
private TimeSpan _fixedUpdateInterval;
private TimeSpan _nextFixedUpdate;
[Dependency] private readonly IGameTiming _gameTiming = default!;
private void InitializeUpdateMetrics()
{
_cfg.OnValueChanged(
CVars.MetricsUpdateInterval,
seconds =>
{
_fixedUpdateInterval = TimeSpan.FromSeconds(seconds);
_nextFixedUpdate = _gameTiming.RealTime + _fixedUpdateInterval;
},
true);
}
public void FrameUpdate()
{
if (_fixedUpdateInterval == TimeSpan.Zero)
return;
var time = _gameTiming.RealTime;
if (_nextFixedUpdate > time)
return;
_nextFixedUpdate = time + _fixedUpdateInterval;
_sawmill.Verbose("Running fixed metrics update");
UpdateMetrics?.Invoke();
}
private async Task BeforeCollectCallback(CancellationToken cancel)
{
if (UpdateMetrics == null)
return;
await _taskManager.TaskOnMainThread(() =>
{
UpdateMetrics?.Invoke();
});
}
}

View File

@@ -3,24 +3,50 @@ using System.Diagnostics.Tracing;
using System.Globalization;
using System.Linq;
using System.Threading.Tasks;
using Prometheus;
using Prometheus.DotNetRuntime;
using Prometheus.DotNetRuntime.Metrics.Producers;
using Robust.Shared;
using Robust.Shared.Asynchronous;
using Robust.Shared.Configuration;
using Robust.Shared.GameObjects;
using Robust.Shared.IoC;
using Robust.Shared.Log;
using EventSource = System.Diagnostics.Tracing.EventSource;
#nullable enable
namespace Robust.Server.DataMetrics;
internal sealed partial class MetricsManager : IMetricsManager, IDisposable
/// <summary>
/// Manages OpenTelemetry metrics exposure.
/// </summary>
/// <remarks>
/// <para>
/// If enabled via <see cref="CVars.MetricsEnabled"/>, metrics about the game server are exposed via a HTTP server
/// in an OpenTelemetry-compatible format (Prometheus).
/// </para>
/// <para>
/// Metrics can be added through the types in <c>System.Diagnostics.Metrics</c> or <c>Prometheus</c>.
/// </para>
/// </remarks>
public interface IMetricsManager
{
/// <summary>
/// An event that gets raised on the main thread when complex metrics should be updated.
/// </summary>
/// <remarks>
/// This event is raised on the main thread before a Prometheus collection happens,
/// and also with a fixed interval if <see cref="CVars.MetricsUpdateInterval"/> is set.
/// You can use it to update complex metrics that can't "just" be stuffed into a counter.
/// </remarks>
event Action UpdateMetrics;
}
internal sealed partial class MetricsManager : IMetricsManagerInternal, IDisposable
{
[Dependency] private readonly IConfigurationManager _cfg = default!;
[Dependency] private readonly IEntitySystemManager _entitySystemManager = default!;
[Dependency] private readonly ILogManager _logManager = default!;
[Dependency] private readonly ITaskManager _taskManager = default!;
private bool _initialized;
@@ -55,6 +81,8 @@ internal sealed partial class MetricsManager : IMetricsManager, IDisposable
{
_cfg.OnValueChanged(cVar, _ => Reload());
}
InitializeUpdateMetrics();
}
private async Task Stop()
@@ -100,7 +128,12 @@ internal sealed partial class MetricsManager : IMetricsManager, IDisposable
_sawmill.Info("Prometheus metrics enabled, host: {1} port: {0}", port, host);
var sawmill = Logger.GetSawmill("metrics.server");
_metricServer = new ManagedHttpListenerMetricsServer(sawmill, host, port);
_metricServer = new ManagedHttpListenerMetricsServer(
sawmill,
host,
port,
registry: Metrics.DefaultRegistry,
beforeCollect: BeforeCollectCallback);
_metricServer.Start();
if (_cfg.GetCVar(CVars.MetricsRuntime))
@@ -190,7 +223,8 @@ internal sealed partial class MetricsManager : IMetricsManager, IDisposable
}
}
internal interface IMetricsManager
internal interface IMetricsManagerInternal : IMetricsManager
{
void Initialize();
void FrameUpdate();
}

View File

@@ -80,6 +80,7 @@ namespace Robust.Server
deps.Register<IWatchdogApi, WatchdogApi>();
deps.Register<IScriptHost, ScriptHost>();
deps.Register<IMetricsManager, MetricsManager>();
deps.Register<IMetricsManagerInternal, MetricsManager>();
deps.Register<IAuthManager, AuthManager>();
deps.Register<HubManager, HubManager>();
deps.Register<IRobustSerializer, ServerRobustSerializer>();

View File

@@ -74,4 +74,31 @@ namespace Robust.Shared.Asynchronous
/// </remarks>
void BlockWaitOnTask(Task task);
}
internal static class TaskManagerExt
{
/// <summary>
/// Run a callback on the main thread, returning a task that represents its completion.
/// </summary>
/// <seealso cref="ITaskManager.RunOnMainThread"/>
public static Task TaskOnMainThread(this ITaskManager taskManager, Action callback)
{
var tcs = new TaskCompletionSource();
taskManager.RunOnMainThread(() =>
{
try
{
callback();
tcs.SetResult();
}
catch (Exception e)
{
tcs.TrySetException(e);
}
});
return tcs.Task;
}
}
}

View File

@@ -416,6 +416,35 @@ namespace Robust.Shared
public static readonly CVarDef<int> MetricsPort =
CVarDef.Create("metrics.port", 44880, CVar.SERVERONLY);
/// <summary>
/// Sets a fixed interval (seconds) for internal collection of certain metrics,
/// when not using the Prometheus metrics server.
/// </summary>
/// <remarks>
/// <para>
/// Most metrics are internally implemented directly via the prometheus-net library.
/// These metrics can only be scraped by the Prometheus metrics server (<see cref="MetricsEnabled"/>).
/// However, newer metrics are implemented with the <c>System.Diagnostics.Metrics</c> library in the .NET runtime.
/// These metrics can be scraped through more means, such as <c>dotnet counters</c>.
/// </para>
/// <para>
/// While many metrics are simple counters that can "just" be reported,
/// some metrics require more advanced internal work and need some code to be ran internally
/// before their values are made current. When collecting metrics via a
/// method other than the Prometheus metrics server, these metrics pose a problem,
/// as there is no way for the game to update them before collection properly.
/// </para>
/// <para>
/// This CVar acts as a fallback: if set to a value other than 0 (disabled),
/// these metrics will be internally updated at the interval provided.
/// </para>
/// <para>
/// This does not need to be enabled if metrics are collected exclusively via the Prometheus metrics server.
/// </para>
/// </remarks>
public static readonly CVarDef<float> MetricsUpdateInterval =
CVarDef.Create("metrics.update_interval", 0f, CVar.SERVERONLY);
/// <summary>
/// Enable detailed runtime metrics. Empty to disable.
/// </summary>