PersistentMap/PersistentOrderedMap/KeyStrategies/IntScanner.cs

168 lines
5.3 KiB
C#

using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace PersistentOrderedMap;
public static class IntScanner
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int FindFirstGreaterOrEqual(ReadOnlySpan<int> keys, int target)
{
// Fallback for short arrays or unsupported hardware.
// AVX2 processes 8 integers at a time.
if (!Avx2.IsSupported || keys.Length < 8)
return LinearScan(keys, target);
return Avx512F.IsSupported
? ScanAvx512(keys, target)
: ScanAvx2(keys, target);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int LinearScan(ReadOnlySpan<int> keys, int target)
{
for (var i = 0; i < keys.Length; i++)
if (keys[i] >= target)
return i;
return keys.Length;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int ScanAvx2(ReadOnlySpan<int> keys, int target)
{
// AVX2 lacks a native GreaterOrEqual for 32-bit integers.
// We use GreaterThan(Data, target - 1).
var vTarget = Vector256.Create(target - 1);
var i = 0;
var len = keys.Length;
for (; i <= len - 8; i += 8)
{
fixed (int* ptr = keys)
{
var vData = Avx2.LoadVector256(ptr + i);
var vResult = Avx2.CompareGreaterThan(vData, vTarget);
// MoveMask creates a 32-bit integer from the most significant bit of each byte.
var mask = (uint)Avx2.MoveMask(vResult.AsByte());
if (mask != 0)
{
// Since an int is 4 bytes, MoveMask sets 4 bits per matching element.
// Dividing the trailing zero count by 4 maps the byte offset back to the integer index.
return i + (BitOperations.TrailingZeroCount(mask) / 4);
}
}
}
return LinearScan(keys.Slice(i), target) + i;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int ScanAvx512(ReadOnlySpan<int> keys, int target)
{
// AVX-512 processes 16 integers (512 bits) per instruction.
var vTarget = Vector512.Create(target);
var i = 0;
var len = keys.Length;
for (; i <= len - 16; i += 16)
{
fixed (int* ptr = keys)
{
var vData = Avx512F.LoadVector512(ptr + i);
// Vector512 API is used directly here to cleanly get the mask
var mask = Vector512.GreaterThanOrEqual(vData, vTarget);
if (mask != Vector512<int>.Zero)
{
uint m = (uint)mask.ExtractMostSignificantBits();
return i + BitOperations.TrailingZeroCount(m);
}
}
}
return LinearScan(keys.Slice(i), target) + i;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int FindFirstGreater(ReadOnlySpan<int> keys, int target)
{
if (!Avx2.IsSupported || keys.Length < 8)
return LinearScanGreater(keys, target);
return Avx512F.IsSupported
? ScanAvx512Greater(keys, target)
: ScanAvx2Greater(keys, target);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int LinearScanGreater(ReadOnlySpan<int> keys, int target)
{
for (var i = 0; i < keys.Length; i++)
if (keys[i] > target)
return i;
return keys.Length;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int ScanAvx2Greater(ReadOnlySpan<int> keys, int target)
{
// For > target, AVX2 CompareGreaterThan works directly without the (target - 1) offset
var vTarget = Vector256.Create(target);
var i = 0;
var len = keys.Length;
for (; i <= len - 8; i += 8)
{
fixed (int* ptr = keys)
{
var vData = Avx2.LoadVector256(ptr + i);
var vResult = Avx2.CompareGreaterThan(vData, vTarget);
var mask = (uint)Avx2.MoveMask(vResult.AsByte());
if (mask != 0)
{
return i + (BitOperations.TrailingZeroCount(mask) / 4);
}
}
}
return LinearScanGreater(keys.Slice(i), target) + i;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int ScanAvx512Greater(ReadOnlySpan<int> keys, int target)
{
var vTarget = Vector512.Create(target);
var i = 0;
var len = keys.Length;
for (; i <= len - 16; i += 16)
{
fixed (int* ptr = keys)
{
var vData = Avx512F.LoadVector512(ptr + i);
// Use GreaterThan instead of GreaterThanOrEqual
var mask = Vector512.GreaterThan(vData, vTarget);
if (mask != Vector512<int>.Zero)
{
uint m = (uint)mask.ExtractMostSignificantBits();
return i + BitOperations.TrailingZeroCount(m);
}
}
}
return LinearScanGreater(keys.Slice(i), target) + i;
}
}