Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 23 additions & 65 deletions src/libraries/System.Collections/src/System/Collections/BitArray.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Runtime.Intrinsics.Arm;
using System.Numerics;

namespace System.Collections
{
Expand Down Expand Up @@ -119,9 +120,7 @@ public BitArray(byte[] bytes)
}

private const uint Vector128ByteCount = 16;
private const uint Vector128IntCount = 4;
private const uint Vector256ByteCount = 32;
private const uint Vector256IntCount = 8;
public unsafe BitArray(bool[] values)
{
ArgumentNullException.ThrowIfNull(values);
Expand Down Expand Up @@ -338,25 +337,17 @@ public unsafe BitArray And(BitArray value)
case 0: goto Done;
}

uint i = 0;

ref int left = ref MemoryMarshal.GetArrayDataReference<int>(thisArray);
ref int right = ref MemoryMarshal.GetArrayDataReference<int>(valueArray);
int i = 0;

if (Vector256.IsHardwareAccelerated)
if (Vector.IsHardwareAccelerated)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using Vector<T> is likely a non-starter right now.

Not only can it not be used with R2R (crossgen) code today since it is variable sized and therefore forces the code to be jitted, but it also is missing certain APIs that are available to Vector128/256<T>. A few of these functions, like ExtractMostSignificantBits can't be exposed on Vector<T>, others like LoadUnsafe could be but aren't today.

There is a design being considered (dotnet/designs#268) where we can extend Vector<T> to better work with such scenarios and better enable its usage in other scenarios, but that isn't available at the moment and its not 100% clear what shape that will end up as.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this case in particular, Vector<T> would likely be similar in perf if we had LoadUnsafe and StoreUnsafe APIs available.

However, it would still come with the restriction that it could not participate in R2R (and therefore forces jitting on first use) and it may regress scenarios where the backing data is typically "small".

For example, Vector<T> on most modern x64 hardware is equivalent to only having the Vector256<T> path. On Arm64, its equivalent to only having the Vector128<T> path. For x64, this means that inputs less than 32-bytes (and potentially 64-bytes in the future) will behave "worse" than the equivalent on Arm64 as they'll execute as "scalar" rather than as "vector". Likewise, depending on data layout, alignment, and processor, it may behave "worse" for inputs up to ~256 bytes as well.

As indicated, these are scenarios that are being looked at and considered, but its not something that we can easily do today.

{
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
fixed (int* pThisBuffer = thisArray)
fixed (int* pValueBuffer = valueArray)
{
Vector256<int> result = Vector256.LoadUnsafe(ref left, i) & Vector256.LoadUnsafe(ref right, i);
result.StoreUnsafe(ref left, i);
}
}
else if (Vector128.IsHardwareAccelerated)
{
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
{
Vector128<int> result = Vector128.LoadUnsafe(ref left, i) & Vector128.LoadUnsafe(ref right, i);
result.StoreUnsafe(ref left, i);
for (; i <= (uint)count - Vector<int>.Count; i += Vector<int>.Count)
{
Unsafe.WriteUnaligned(pThisBuffer + i, Unsafe.ReadUnaligned<Vector<int>>(pThisBuffer + i) & Unsafe.ReadUnaligned<Vector<int>>(pValueBuffer + i));
}
}
}

Expand Down Expand Up @@ -404,25 +395,14 @@ public unsafe BitArray Or(BitArray value)
case 0: goto Done;
}

uint i = 0;

ref int left = ref MemoryMarshal.GetArrayDataReference<int>(thisArray);
ref int right = ref MemoryMarshal.GetArrayDataReference<int>(valueArray);
int i = 0;

if (Vector256.IsHardwareAccelerated)
{
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
{
Vector256<int> result = Vector256.LoadUnsafe(ref left, i) | Vector256.LoadUnsafe(ref right, i);
result.StoreUnsafe(ref left, i);
}
}
else if (Vector128.IsHardwareAccelerated)
fixed (int* pThisBuffer = thisArray)
fixed (int* pValueBuffer = valueArray)
{
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
for (; i <= (uint)count - Vector<int>.Count; i += Vector<int>.Count)
{
Vector128<int> result = Vector128.LoadUnsafe(ref left, i) | Vector128.LoadUnsafe(ref right, i);
result.StoreUnsafe(ref left, i);
Unsafe.WriteUnaligned(pThisBuffer + i, Unsafe.ReadUnaligned<Vector<int>>(pThisBuffer + i) | Unsafe.ReadUnaligned<Vector<int>>(pValueBuffer + i));
}
}

Expand Down Expand Up @@ -470,25 +450,14 @@ public unsafe BitArray Xor(BitArray value)
case 0: goto Done;
}

uint i = 0;

ref int left = ref MemoryMarshal.GetArrayDataReference<int>(thisArray);
ref int right = ref MemoryMarshal.GetArrayDataReference<int>(valueArray);
int i = 0;

if (Vector256.IsHardwareAccelerated)
{
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
{
Vector256<int> result = Vector256.LoadUnsafe(ref left, i) ^ Vector256.LoadUnsafe(ref right, i);
result.StoreUnsafe(ref left, i);
}
}
else if (Vector128.IsHardwareAccelerated)
fixed (int* pThisBuffer = thisArray)
fixed (int* pValueBuffer = valueArray)
{
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
for (; i <= (uint)count - Vector<int>.Count; i += Vector<int>.Count)
{
Vector128<int> result = Vector128.LoadUnsafe(ref left, i) ^ Vector128.LoadUnsafe(ref right, i);
result.StoreUnsafe(ref left, i);
Unsafe.WriteUnaligned(pThisBuffer + i, Unsafe.ReadUnaligned<Vector<int>>(pThisBuffer + i) ^ Unsafe.ReadUnaligned<Vector<int>>(pValueBuffer + i));
}
}

Expand Down Expand Up @@ -529,24 +498,13 @@ public unsafe BitArray Not()
case 0: goto Done;
}

uint i = 0;

ref int value = ref MemoryMarshal.GetArrayDataReference<int>(thisArray);
int i = 0;

if (Vector256.IsHardwareAccelerated)
{
for (; i < (uint)count - (Vector256IntCount - 1u); i += Vector256IntCount)
{
Vector256<int> result = ~Vector256.LoadUnsafe(ref value, i);
result.StoreUnsafe(ref value, i);
}
}
else if (Vector128.IsHardwareAccelerated)
fixed (int* pThisBuffer = thisArray)
{
for (; i < (uint)count - (Vector128IntCount - 1u); i += Vector128IntCount)
for (; i <= (uint)count - Vector<int>.Count; i += Vector<int>.Count)
{
Vector128<int> result = ~Vector128.LoadUnsafe(ref value, i);
result.StoreUnsafe(ref value, i);
Unsafe.WriteUnaligned(pThisBuffer + i, ~Unsafe.ReadUnaligned<Vector<int>>(pThisBuffer + i));
}
}

Expand Down