mirror of
https://github.com/space-wizards/RobustToolbox.git
synced 2026-02-14 19:29:36 +01:00
* Box Simd * Add 256 bit version of GetAABB * Add AABB bechmarks * No real diff between 128 & 256, so removing 256 | Method | Mean | Error | StdDev | Ratio | |----------- |----------:|----------:|----------:|------:| | GetAABB | 5.8107 ns | 0.0154 ns | 0.0137 ns | 1.00 | | GetAABB128 | 0.4927 ns | 0.0003 ns | 0.0002 ns | 0.08 | | GetAABB256 | 0.4332 ns | 0.0006 ns | 0.0006 ns | 0.07 | * Add Box2Rotated.Transform Benchmark * Results 20% faster and much smaller code. Also I don't think it inlined RotateVec * Add Matrix3x2Helper.TransformBox() benchmark new: | Method | Mean | Error | StdDev | Code Size | |---------- |---------:|----------:|----------:|----------:| | Transform | 2.463 ns | 0.0766 ns | 0.0679 ns | 216 B | old: | Method | Mean | Error | StdDev | Median | Code Size | |---------- |---------:|----------:|----------:|---------:|----------:| | Transform | 9.469 ns | 0.2140 ns | 0.5408 ns | 9.206 ns | 621 B | * Fix polygon constructor * SlimPolygonBenchmark * use new SimdHelper for other methods * Fix bugs * Use new methods * Simd SlimPolygon.ComputeAABB * Move simd transform to physics * Cleanup * Remove uneccesary Unsafe.SkipInit * These tests all work on master * Add Transform.MulSimd test * Add SlimPolygon constructor tests * Add ComputeAABB test --------- Co-authored-by: metalgearsloth <31366439+metalgearsloth@users.noreply.github.com>
160 lines
6.2 KiB
C#
160 lines
6.2 KiB
C#
using System.Runtime.CompilerServices;
|
|
using System.Runtime.Intrinsics;
|
|
using System.Runtime.Intrinsics.X86;
|
|
|
|
namespace Robust.Shared.Maths
|
|
{
|
|
/// <summary>
|
|
/// Helper stuff for SIMD code.
|
|
/// </summary>
|
|
internal static class SimdHelpers
|
|
{
|
|
/// <returns>The min value is broadcast to the whole vector.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector128<float> MinHorizontal128(Vector128<float> v)
|
|
{
|
|
var b = Vector128.Shuffle(v, Vector128.Create(1, 0, 3, 2));
|
|
var m = Vector128.Min(b, v);
|
|
var c = Vector128.Shuffle(m, Vector128.Create(2, 3, 0, 1));
|
|
return Vector128.Min(c, m);
|
|
}
|
|
|
|
/// <returns>The max value is broadcast to the whole vector.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector128<float> MaxHorizontal128(Vector128<float> v)
|
|
{
|
|
var b = Vector128.Shuffle(v, Vector128.Create(1, 0, 3, 2));
|
|
var m = Vector128.Max(b, v);
|
|
var c = Vector128.Shuffle(m, Vector128.Create(2, 3, 0, 1));
|
|
return Vector128.Max(c, m);
|
|
}
|
|
|
|
/// <returns>The added value is broadcast to the whole vector.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector128<float> AddHorizontal128(Vector128<float> v)
|
|
{
|
|
var b = Vector128.Shuffle(v, Vector128.Create(1, 0, 3, 2));
|
|
var m = b + v;
|
|
var c = Vector128.Shuffle(m, Vector128.Create(2, 3, 0, 1));
|
|
return c + m;
|
|
}
|
|
|
|
/// <returns>The added value is broadcast to the whole vector.</returns>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector256<float> AddHorizontal256(Vector256<float> v)
|
|
{
|
|
var b = Vector256.Shuffle(v, Vector256.Create(1, 0, 3, 2, 5, 4, 7, 6));
|
|
var m = b + v;
|
|
var c = Vector256.Shuffle(m, Vector256.Create(2, 3, 0, 1, 6, 7, 4, 5));
|
|
var n = c + m;
|
|
var d = Vector256.Shuffle(n, Vector256.Create(4, 5, 6, 7, 0, 1, 2, 3));
|
|
return n + d;
|
|
}
|
|
|
|
#region GetAABB
|
|
|
|
/// <summary>
|
|
/// This computes the bounding box given a set of 4 coordinates specified via 2 simd vectors.
|
|
/// This effectively computes the horizontal min & max of both of the given vectors.
|
|
/// </summary>
|
|
/// <remarks>
|
|
/// Returns a simd vector that can be directly cast to a <see cref="Box2"/>.
|
|
/// </remarks>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector128<float> GetAABB(Vector128<float> x, Vector128<float> y)
|
|
{
|
|
return Avx.IsSupported ? GetAABBAvx(x, y) : GetAABBSlow(x, y);
|
|
}
|
|
|
|
/// <summary>
|
|
/// This computes the bounding box given a set of 4 coordinates specified via 2 simd vectors.
|
|
/// This effectively computes the horizontal min & max of both of the given vectors.
|
|
/// </summary>
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector128<float> GetAABBAvx(Vector128<float> x, Vector128<float> y)
|
|
{
|
|
// This can be turned into a 256 bit version that only needs 4 min/max instead of 6
|
|
// But the performance difference seems negligible.
|
|
|
|
// x = [x0, x1, x2, x3]
|
|
// y = [y0, y1, y2, y3]
|
|
|
|
var xmin = Vector128.Shuffle(x, Vector128.Create(1, 0, 3, 2));
|
|
xmin = Sse.Min(xmin, x);
|
|
// xmin = [min(x0,x1), min(x0,x1), min(x2,x3), min(x2,x3)]
|
|
|
|
var ymin = Vector128.Shuffle(y, Vector128.Create(1, 0, 3, 2));
|
|
ymin = Sse.Min(ymin, y);
|
|
// ymin = [min(y0,y1), min(y0,x1), min(y2,y3), min(y2,y3)]
|
|
|
|
var xymin = Sse41.Blend(xmin, ymin, 0b_1_0_1_0);
|
|
// xymin = [min(x0,x1), min(y0,y1), min(x2,x3), min(y2,y3)]
|
|
|
|
var xyminPermuted = Avx.Permute(xymin, 0b_00_00_11_10);
|
|
// xymin_permuted = [min(x2,x3), min(y2,y3), ..., ... ]
|
|
|
|
var min = Sse.Min(xymin, xyminPermuted);
|
|
// min = [min(x0,x1,x2,x3), min(y0,y1,y2,y3), ..., ... ]
|
|
|
|
var xmax = Vector128.Shuffle(x, Vector128.Create(1, 0, 3, 2));
|
|
xmax = Sse.Max(xmax, x);
|
|
// xmax = [max(x0,x1), max(x0,x1), max(x2,x3), max(x2,x3)]
|
|
|
|
var ymax = Vector128.Shuffle(y, Vector128.Create(1, 0, 3, 2));
|
|
ymax = Sse.Max(ymax, y);
|
|
// ymax = [max(y0,y1), max(y0,y1), max(y2,y3), max(y2,y3)]
|
|
|
|
var xymax = Sse41.Blend(xmax, ymax, 0b_1_0_1_0);
|
|
// xymax = [max(x0,x1), max(y0,y1), max(x2,x3), max(y2,y3)]
|
|
|
|
var xymaxPermuted = Avx.Permute(xymax, 0b_01_00_00_00);
|
|
// xymax_permuted = [.., .., max(x0,x1), max(y0,y1) ]
|
|
|
|
var max = Sse.Max(xymax, xymaxPermuted);
|
|
// max = [.., .., max(x0,x1,x2,x3), max(y0,y1,y2,y3) ]
|
|
|
|
// result = [min(x0,x1,x2,x3), min(y0,y1,y2,y3), max(x0,x1,x2,x3), max(y0,y1,y2,y3) ]
|
|
return Sse41.Blend(min, max, 0b_1_1_0_0);
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector128<float> GetAABBSlow(Vector128<float> x, Vector128<float> y)
|
|
{
|
|
var l = MinHorizontal128(x);
|
|
var b = MinHorizontal128(y);
|
|
var r = MaxHorizontal128(x);
|
|
var t = MaxHorizontal128(y);
|
|
return MergeRows128(l, b, r, t);
|
|
}
|
|
|
|
#endregion
|
|
|
|
// Given the following vectors:
|
|
// x: X X X X
|
|
// y: Y Y Y Y
|
|
// z: Z Z Z Z
|
|
// w: W W W W
|
|
// Returns: X Y Z W
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static Vector128<float> MergeRows128(
|
|
Vector128<float> x,
|
|
Vector128<float> y,
|
|
Vector128<float> z,
|
|
Vector128<float> w)
|
|
{
|
|
if (Sse.IsSupported)
|
|
{
|
|
var xy = Sse.UnpackLow(x, y);
|
|
var zw = Sse.UnpackLow(z, w);
|
|
return Sse.Shuffle(xy, zw, 0b11_10_01_00);
|
|
}
|
|
|
|
return Vector128.Create(
|
|
x.GetElement(0),
|
|
y.GetElement(0),
|
|
z.GetElement(0),
|
|
w.GetElement(0));
|
|
}
|
|
}
|
|
}
|