Files
RobustToolbox/Robust.Client/Graphics/Clyde/Clyde.Sprite.cs
Leon Friedrich 3f19d25018 Box Simd (#6193)
* Box Simd

* Add 256 bit version of GetAABB

* Add AABB bechmarks

* No real diff between 128 & 256, so removing 256

| Method     | Mean      | Error     | StdDev    | Ratio |
|----------- |----------:|----------:|----------:|------:|
| GetAABB    | 5.8107 ns | 0.0154 ns | 0.0137 ns |  1.00 |
| GetAABB128 | 0.4927 ns | 0.0003 ns | 0.0002 ns |  0.08 |
| GetAABB256 | 0.4332 ns | 0.0006 ns | 0.0006 ns |  0.07 |

* Add Box2Rotated.Transform Benchmark

* Results

20% faster and much smaller code. Also I don't think it inlined RotateVec

* Add Matrix3x2Helper.TransformBox() benchmark

new:

| Method    | Mean     | Error     | StdDev    | Code Size |
|---------- |---------:|----------:|----------:|----------:|
| Transform | 2.463 ns | 0.0766 ns | 0.0679 ns |     216 B |

old:
| Method    | Mean     | Error     | StdDev    | Median   | Code Size |
|---------- |---------:|----------:|----------:|---------:|----------:|
| Transform | 9.469 ns | 0.2140 ns | 0.5408 ns | 9.206 ns |     621 B |

* Fix polygon constructor

* SlimPolygonBenchmark

* use new SimdHelper for other methods

* Fix bugs

* Use new methods

* Simd SlimPolygon.ComputeAABB

* Move simd transform to physics

* Cleanup

* Remove uneccesary Unsafe.SkipInit

* These tests all work on master

* Add Transform.MulSimd test

* Add SlimPolygon constructor tests

* Add ComputeAABB test

---------

Co-authored-by: metalgearsloth <31366439+metalgearsloth@users.noreply.github.com>
2025-11-10 18:30:08 +11:00

252 lines
10 KiB
C#

using System;
using System.Buffers;
using System.Collections.Generic;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Threading.Tasks;
using Robust.Client.ComponentTrees;
using Robust.Client.GameObjects;
using Robust.Shared.GameObjects;
using Robust.Shared.Graphics;
using Robust.Shared.Map;
using Robust.Shared.Maths;
using Robust.Shared.Physics;
using Robust.Shared.Threading;
using Robust.Shared.Utility;
namespace Robust.Client.Graphics.Clyde;
// this partial class contains code specific to querying, processing & sorting sprites.
internal partial class Clyde
{
[Shared.IoC.Dependency] private readonly IParallelManager _parMan = default!;
private readonly RefList<SpriteData> _drawingSpriteList = new();
private const int _spriteProcessingBatchSize = 25;
private void GetSprites(MapId map, Viewport view, IEye eye, Box2Rotated worldBounds, out int[] indexList)
{
ProcessSpriteEntities(map, view, eye, worldBounds, _drawingSpriteList);
// We use a separate list for indexing sprites so that the sort is faster.
indexList = ArrayPool<int>.Shared.Rent(_drawingSpriteList.Count);
// populate index list
for (var i = 0; i < _drawingSpriteList.Count; i++)
indexList[i] = i;
// sort index list
// TODO better sorting? parallel merge sort?
Array.Sort(indexList, 0, _drawingSpriteList.Count, new SpriteDrawingOrderComparer(_drawingSpriteList));
}
[MethodImpl(MethodImplOptions.NoInlining)]
private void ProcessSpriteEntities(MapId map, Viewport view, IEye eye, Box2Rotated worldBounds, RefList<SpriteData> list)
{
var query = _entityManager.GetEntityQuery<TransformComponent>();
var viewScale = eye.Scale * view.RenderScale * new Vector2(EyeManager.PixelsPerMeter, -EyeManager.PixelsPerMeter);
var treeData = new BatchData()
{
Sys = _entityManager.EntitySysManager.GetEntitySystem<TransformSystem>(),
Query = query,
ViewRotation = eye.Rotation,
ViewScale = viewScale,
PreScaleViewOffset = view.Size / 2f / viewScale,
ViewPosition = eye.Position.Position + eye.Offset
};
// We need to batch the actual tree query, or alternatively we need just get the list of sprites and then
// parallelize the rotation & bounding box calculations.
var index = 0;
var added = 0;
var opts = new ParallelOptions { MaxDegreeOfParallelism = _parMan.ParallelProcessCount };
foreach (var (treeOwner, comp) in _spriteTreeSystem.GetIntersectingTrees(map, worldBounds))
{
var treeXform = query.GetComponent(treeOwner);
var bounds = _transformSystem.GetInvWorldMatrix(treeOwner).TransformBox(worldBounds);
DebugTools.Assert(treeXform.MapUid == treeXform.ParentUid || !treeXform.ParentUid.IsValid());
treeData = treeData with
{
TreeOwner = treeOwner,
TreePos = treeXform.LocalPosition,
TreeRot = treeXform.LocalRotation,
Sin = MathF.Sin((float)treeXform.LocalRotation),
Cos = MathF.Cos((float)treeXform.LocalRotation),
};
comp.Tree.QueryAabb(ref list,
static (ref RefList<SpriteData> state, in ComponentTreeEntry<SpriteComponent> value) =>
{
ref var entry = ref state.AllocAdd();
entry.Uid = value.Uid;
entry.Sprite = value.Component;
entry.Xform = value.Transform;
return true;
}, bounds, true);
// Get bounding boxes & world positions
added = list.Count - index;
var batches = added/_spriteProcessingBatchSize;
// TODO also do sorting here & use a merge sort later on for y-sorting?
if (batches > 1)
Parallel.For(0, batches, opts, (i) => ProcessSprites(list, index + i * _spriteProcessingBatchSize, _spriteProcessingBatchSize, treeData));
else
batches = 0;
var remainder = added - _spriteProcessingBatchSize * batches;
if (remainder > 0)
ProcessSprites(list, index + batches * _spriteProcessingBatchSize, remainder, treeData);
index += batches * _spriteProcessingBatchSize + remainder;
}
}
/// <summary>
/// This function computes a sprites world position, rotation, and screen-space bounding box. The position &
/// rotation are required in general, but the bounding box is only really needed for y-sorting & if the
/// sprite has a post processing shader.
/// </summary>
private void ProcessSprites(
RefList<SpriteData> list,
int startIndex,
int count,
in BatchData batch)
{
for (int i = startIndex; i < startIndex + count; i++)
{
ref var data = ref list[i];
DebugTools.Assert(data.Sprite.Visible);
// To help explain the remainder of this function, it should be functionally equivalent to the following
// three lines of code, but has been expanded & simplified to speed up the calculation:
//
// (data.WorldPos, data.WorldRot) = batch.Sys.GetWorldPositionRotation(data.Xform, batch.Query);
// var spriteWorldBB = data.Sprite.CalculateRotatedBoundingBox(data.WorldPos, data.WorldRot, batch.ViewRotation);
// data.SpriteScreenBB = Viewport.GetWorldToLocalMatrix().TransformBox(spriteWorldBB);
var (pos, rot) = batch.Sys.GetRelativePositionRotation(data.Xform, batch.TreeOwner, batch.Query);
pos = new Vector2(
batch.TreePos.X + batch.Cos * pos.X - batch.Sin * pos.Y,
batch.TreePos.Y + batch.Sin * pos.X + batch.Cos * pos.Y);
rot += batch.TreeRot;
data.WorldRot = rot;
data.WorldPos = pos;
var finalRotation = (float) (data.Sprite.NoRotation
? data.Sprite.Rotation
: data.Sprite.Rotation + rot + batch.ViewRotation);
// false for 99.9% of sprites
if (data.Sprite.Offset != Vector2.Zero)
{
pos += data.Sprite.NoRotation
? (-batch.ViewRotation).RotateVec(data.Sprite.Offset)
: rot.RotateVec(data.Sprite.Offset);
}
pos = batch.ViewRotation.RotateVec(pos - batch.ViewPosition);
// special casing angle = n*pi/2 to avoid box rotation & bounding calculations doesn't seem to give significant speedups.
data.SpriteScreenBB = TransformCenteredBox(
_spriteSystem.GetLocalBounds((data.Uid, data.Sprite)),
finalRotation,
pos + batch.PreScaleViewOffset,
batch.ViewScale);
}
}
/// <summary>
/// This is effectively a specialized combination of a <see cref="Matrix3Helpers.TransformBox(Matrix3x2, in Box2)"/>.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static unsafe Box2 TransformCenteredBox(in Box2 box, float angle, in Vector2 offset, in Vector2 scale)
{
var boxVec = Unsafe.As<Box2, Vector128<float>>(ref Unsafe.AsRef(in box));
var sin = Vector128.Create(MathF.Sin(angle));
var cos = Vector128.Create(MathF.Cos(angle));
var boxX = Vector128.Shuffle(boxVec, Vector128.Create(0, 0, 2, 2));
var boxY = Vector128.Shuffle(boxVec, Vector128.Create(1, 3, 3, 1));
var x = boxX * cos - boxY * sin;
var y = boxX * sin + boxY * cos;
var lbrt = SimdHelpers.GetAABB(x, y);
// This function is for sprites, which flip the y-axis via the scale, so we need to flip t & b.
DebugTools.Assert(scale.Y < 0);
lbrt = Vector128.Shuffle(lbrt, Vector128.Create(0,3,2,1));
var offsetVec = Unsafe.As<Vector2, Vector128<float>>(ref Unsafe.AsRef(in offset)); // upper undefined
var scaleVec = Unsafe.As<Vector2, Vector128<float>>(ref Unsafe.AsRef(in scale)); // upper undefined
offsetVec = Vector128.Shuffle(offsetVec, Vector128.Create(0, 1, 0, 1));
scaleVec = Vector128.Shuffle(scaleVec, Vector128.Create(0, 1, 0, 1));
// offset and scale box.
// note that the scaling here is scaling the whole space, not jut the box. I.e., the centre of the box is changing
lbrt = (lbrt + offsetVec) * scaleVec;
return Unsafe.As<Vector128<float>, Box2>(ref lbrt);
}
private struct SpriteData
{
public EntityUid Uid;
public SpriteComponent Sprite;
public TransformComponent Xform;
public Vector2 WorldPos;
public Angle WorldRot;
public Box2 SpriteScreenBB;
}
private readonly struct BatchData
{
public TransformSystem Sys { get; init; }
public EntityQuery<TransformComponent> Query { get; init; }
public Angle ViewRotation { get; init; }
public Vector2 ViewScale { get; init; }
public Vector2 PreScaleViewOffset { get; init; }
public Vector2 ViewPosition { get; init; }
public EntityUid TreeOwner { get; init; }
public Vector2 TreePos { get; init; }
public Angle TreeRot { get; init; }
public float Sin { get; init; }
public float Cos { get; init; }
}
private sealed class SpriteDrawingOrderComparer : IComparer<int>
{
private readonly RefList<SpriteData> _drawList;
public SpriteDrawingOrderComparer(RefList<SpriteData> drawList)
{
_drawList = drawList;
}
public int Compare(int x, int y)
{
var a = _drawList[x];
var b = _drawList[y];
var cmp = a.Sprite.DrawDepth.CompareTo(b.Sprite.DrawDepth);
if (cmp != 0)
return cmp;
cmp = a.Sprite.RenderOrder.CompareTo(b.Sprite.RenderOrder);
if (cmp != 0)
return cmp;
// compare the top of the sprite's BB for y-sorting. Because screen coordinates are flipped, the "top" of the BB is actually the "bottom".
cmp = a.SpriteScreenBB.Top.CompareTo(b.SpriteScreenBB.Top);
if (cmp != 0)
return cmp;
return a.Uid.CompareTo(b.Uid);
}
}
}