using System;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
namespace Robust.Shared.Maths
{
public static unsafe partial class NumericsHelpers
{
#region Add
///
/// Adds b to a and stores the result in a.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Add(Span a, ReadOnlySpan b)
{
Add(a, b, a);
}
///
/// Adds b to a and stores the result in s.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Add(ReadOnlySpan a, ReadOnlySpan b, Span s)
{
if (a.Length != b.Length || a.Length != s.Length)
throw new ArgumentException("Length of arrays must be the same!");
if (Vector256Enabled && LengthValid256Single(a.Length))
{
Add256(a, b, s);
return;
}
Add128(a, b, s);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void AddScalar(ReadOnlySpan a, ReadOnlySpan b, Span s, int start, int end)
{
for (var i = start; i < end; i++)
{
s[i] = a[i] + b[i];
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void Add128(ReadOnlySpan a, ReadOnlySpan b, Span s)
{
var remainder = a.Length & (Vector128.Count - 1);
var length = a.Length - remainder;
fixed (float* ptr = a)
fixed (float* ptrB = b)
fixed (float* ptrS = s)
{
for (var i = 0; i < length; i += Vector128.Count)
{
var j = Vector128.Load(ptr + i);
var k = Vector128.Load(ptrB + i);
Vector128.Add(j, k).Store(ptrS + i);
}
}
if (remainder != 0)
{
AddScalar(a, b, s, length, a.Length);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void Add256(ReadOnlySpan a, ReadOnlySpan b, Span s)
{
var remainder = a.Length & (Vector256.Count - 1);
var length = a.Length - remainder;
fixed (float* ptr = a)
fixed (float* ptrB = b)
fixed (float* ptrS = s)
{
for (var i = 0; i < length; i += Vector256.Count)
{
var j = Vector256.Load(ptr + i);
var k = Vector256.Load(ptrB + i);
Vector256.Add(j, k).Store(ptrS + i);
}
}
if (remainder != 0)
{
AddScalar(a, b, s, length, a.Length);
}
}
#endregion
#region AddByScalar
///
/// Adds scalar b to a and stores the result in a.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Add(Span a, float b)
{
Add(a, b, a);
}
///
/// Adds scalar b to a and stores the result in s.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Add(ReadOnlySpan a, float b, Span s)
{
if (a.Length != s.Length)
throw new ArgumentException("Length of arrays must be the same!");
if (Vector256Enabled && LengthValid256Single(a.Length))
{
Add256(a, b, s);
return;
}
Add128(a, b, s);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void AddScalar(ReadOnlySpan a, float b, Span s, int start, int end)
{
for (var i = start; i < end; i++)
{
s[i] = a[i] + b;
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void Add128(ReadOnlySpan a, float b, Span s)
{
var remainder = a.Length & (Vector128.Count - 1);
var length = a.Length - remainder;
var scalar = Vector128.Create(b);
fixed (float* ptr = a)
fixed (float* ptrS = s)
{
for (var i = 0; i < length; i += Vector128.Count)
{
var j = Vector128.Load(ptr + i);
Vector128.Add(j, scalar).Store(ptrS + i);
}
}
if (remainder != 0)
{
AddScalar(a, b, s, length, a.Length);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static void Add256(ReadOnlySpan a, float b, Span s)
{
var remainder = a.Length & (Vector256.Count - 1);
var length = a.Length - remainder;
var scalar = Vector256.Create(b);
fixed (float* ptr = a)
fixed (float* ptrS = s)
{
for (var i = 0; i < length; i += Vector256.Count)
{
var j = Vector256.Load(ptr + i);
Vector256.Add(j, scalar).Store(ptrS + i);
}
}
if (remainder != 0)
{
AddScalar(a, b, s, length, a.Length);
}
}
#endregion
#region HorizontalAdd
///
/// Adds all elements of a and returns the value.
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static float HorizontalAdd(ReadOnlySpan a)
{
if (Vector256Enabled && LengthValid256Single(a.Length))
{
return HorizontalAdd256(a);
}
return HorizontalAdd128(a);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static float HorizontalAddScalar(ReadOnlySpan a, int start, int end)
{
var sum = 0f;
for (var i = start; i < end; i++)
{
sum += a[i];
}
return sum;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static float HorizontalAdd128(ReadOnlySpan a)
{
var remainder = a.Length & 3;
var length = a.Length - remainder;
var accumulator = Vector128.Create(0f);
fixed (float* ptr = a)
{
for (var i = 0; i < length; i += 4)
{
var j = Vector128.Load(ptr + i);
accumulator = Vector128.Add(accumulator, j);
}
}
var sum = SimdHelpers.AddHorizontal128(accumulator).GetElement(0);
if (remainder != 0)
{
sum += HorizontalAddScalar(a, length, a.Length);
}
return sum;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static float HorizontalAdd256(ReadOnlySpan a)
{
var remainder = a.Length & 7;
var length = a.Length - remainder;
var accumulator = Vector256.Create(0f);
fixed (float* ptr = a)
{
for (var i = 0; i < length; i += 8)
{
var j = Vector256.Load(ptr + i);
accumulator = Vector256.Add(j, accumulator);
}
}
var sum = SimdHelpers.AddHorizontal256(accumulator).GetElement(0);
if (remainder != 0)
{
sum += HorizontalAddScalar(a, length, a.Length);
}
return sum;
}
#endregion
}
}