Files
RobustToolbox/Robust.Shared.Maths/SimdHelpers.cs
Leon Friedrich 3f19d25018 Box Simd (#6193)
* Box Simd

* Add 256 bit version of GetAABB

* Add AABB bechmarks

* No real diff between 128 & 256, so removing 256

| Method     | Mean      | Error     | StdDev    | Ratio |
|----------- |----------:|----------:|----------:|------:|
| GetAABB    | 5.8107 ns | 0.0154 ns | 0.0137 ns |  1.00 |
| GetAABB128 | 0.4927 ns | 0.0003 ns | 0.0002 ns |  0.08 |
| GetAABB256 | 0.4332 ns | 0.0006 ns | 0.0006 ns |  0.07 |

* Add Box2Rotated.Transform Benchmark

* Results

20% faster and much smaller code. Also I don't think it inlined RotateVec

* Add Matrix3x2Helper.TransformBox() benchmark

new:

| Method    | Mean     | Error     | StdDev    | Code Size |
|---------- |---------:|----------:|----------:|----------:|
| Transform | 2.463 ns | 0.0766 ns | 0.0679 ns |     216 B |

old:
| Method    | Mean     | Error     | StdDev    | Median   | Code Size |
|---------- |---------:|----------:|----------:|---------:|----------:|
| Transform | 9.469 ns | 0.2140 ns | 0.5408 ns | 9.206 ns |     621 B |

* Fix polygon constructor

* SlimPolygonBenchmark

* use new SimdHelper for other methods

* Fix bugs

* Use new methods

* Simd SlimPolygon.ComputeAABB

* Move simd transform to physics

* Cleanup

* Remove uneccesary Unsafe.SkipInit

* These tests all work on master

* Add Transform.MulSimd test

* Add SlimPolygon constructor tests

* Add ComputeAABB test

---------

Co-authored-by: metalgearsloth <31366439+metalgearsloth@users.noreply.github.com>
2025-11-10 18:30:08 +11:00

160 lines
6.2 KiB
C#

using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace Robust.Shared.Maths
{
/// <summary>
/// Helper stuff for SIMD code.
/// </summary>
internal static class SimdHelpers
{
/// <returns>The min value is broadcast to the whole vector.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> MinHorizontal128(Vector128<float> v)
{
var b = Vector128.Shuffle(v, Vector128.Create(1, 0, 3, 2));
var m = Vector128.Min(b, v);
var c = Vector128.Shuffle(m, Vector128.Create(2, 3, 0, 1));
return Vector128.Min(c, m);
}
/// <returns>The max value is broadcast to the whole vector.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> MaxHorizontal128(Vector128<float> v)
{
var b = Vector128.Shuffle(v, Vector128.Create(1, 0, 3, 2));
var m = Vector128.Max(b, v);
var c = Vector128.Shuffle(m, Vector128.Create(2, 3, 0, 1));
return Vector128.Max(c, m);
}
/// <returns>The added value is broadcast to the whole vector.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> AddHorizontal128(Vector128<float> v)
{
var b = Vector128.Shuffle(v, Vector128.Create(1, 0, 3, 2));
var m = b + v;
var c = Vector128.Shuffle(m, Vector128.Create(2, 3, 0, 1));
return c + m;
}
/// <returns>The added value is broadcast to the whole vector.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> AddHorizontal256(Vector256<float> v)
{
var b = Vector256.Shuffle(v, Vector256.Create(1, 0, 3, 2, 5, 4, 7, 6));
var m = b + v;
var c = Vector256.Shuffle(m, Vector256.Create(2, 3, 0, 1, 6, 7, 4, 5));
var n = c + m;
var d = Vector256.Shuffle(n, Vector256.Create(4, 5, 6, 7, 0, 1, 2, 3));
return n + d;
}
#region GetAABB
/// <summary>
/// This computes the bounding box given a set of 4 coordinates specified via 2 simd vectors.
/// This effectively computes the horizontal min & max of both of the given vectors.
/// </summary>
/// <remarks>
/// Returns a simd vector that can be directly cast to a <see cref="Box2"/>.
/// </remarks>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> GetAABB(Vector128<float> x, Vector128<float> y)
{
return Avx.IsSupported ? GetAABBAvx(x, y) : GetAABBSlow(x, y);
}
/// <summary>
/// This computes the bounding box given a set of 4 coordinates specified via 2 simd vectors.
/// This effectively computes the horizontal min & max of both of the given vectors.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> GetAABBAvx(Vector128<float> x, Vector128<float> y)
{
// This can be turned into a 256 bit version that only needs 4 min/max instead of 6
// But the performance difference seems negligible.
// x = [x0, x1, x2, x3]
// y = [y0, y1, y2, y3]
var xmin = Vector128.Shuffle(x, Vector128.Create(1, 0, 3, 2));
xmin = Sse.Min(xmin, x);
// xmin = [min(x0,x1), min(x0,x1), min(x2,x3), min(x2,x3)]
var ymin = Vector128.Shuffle(y, Vector128.Create(1, 0, 3, 2));
ymin = Sse.Min(ymin, y);
// ymin = [min(y0,y1), min(y0,x1), min(y2,y3), min(y2,y3)]
var xymin = Sse41.Blend(xmin, ymin, 0b_1_0_1_0);
// xymin = [min(x0,x1), min(y0,y1), min(x2,x3), min(y2,y3)]
var xyminPermuted = Avx.Permute(xymin, 0b_00_00_11_10);
// xymin_permuted = [min(x2,x3), min(y2,y3), ..., ... ]
var min = Sse.Min(xymin, xyminPermuted);
// min = [min(x0,x1,x2,x3), min(y0,y1,y2,y3), ..., ... ]
var xmax = Vector128.Shuffle(x, Vector128.Create(1, 0, 3, 2));
xmax = Sse.Max(xmax, x);
// xmax = [max(x0,x1), max(x0,x1), max(x2,x3), max(x2,x3)]
var ymax = Vector128.Shuffle(y, Vector128.Create(1, 0, 3, 2));
ymax = Sse.Max(ymax, y);
// ymax = [max(y0,y1), max(y0,y1), max(y2,y3), max(y2,y3)]
var xymax = Sse41.Blend(xmax, ymax, 0b_1_0_1_0);
// xymax = [max(x0,x1), max(y0,y1), max(x2,x3), max(y2,y3)]
var xymaxPermuted = Avx.Permute(xymax, 0b_01_00_00_00);
// xymax_permuted = [.., .., max(x0,x1), max(y0,y1) ]
var max = Sse.Max(xymax, xymaxPermuted);
// max = [.., .., max(x0,x1,x2,x3), max(y0,y1,y2,y3) ]
// result = [min(x0,x1,x2,x3), min(y0,y1,y2,y3), max(x0,x1,x2,x3), max(y0,y1,y2,y3) ]
return Sse41.Blend(min, max, 0b_1_1_0_0);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> GetAABBSlow(Vector128<float> x, Vector128<float> y)
{
var l = MinHorizontal128(x);
var b = MinHorizontal128(y);
var r = MaxHorizontal128(x);
var t = MaxHorizontal128(y);
return MergeRows128(l, b, r, t);
}
#endregion
// Given the following vectors:
// x: X X X X
// y: Y Y Y Y
// z: Z Z Z Z
// w: W W W W
// Returns: X Y Z W
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> MergeRows128(
Vector128<float> x,
Vector128<float> y,
Vector128<float> z,
Vector128<float> w)
{
if (Sse.IsSupported)
{
var xy = Sse.UnpackLow(x, y);
var zw = Sse.UnpackLow(z, w);
return Sse.Shuffle(xy, zw, 0b11_10_01_00);
}
return Vector128.Create(
x.GetElement(0),
y.GetElement(0),
z.GetElement(0),
w.GetElement(0));
}
}
}