168 lines
5.3 KiB
C#
168 lines
5.3 KiB
C#
using System.Numerics;
|
|
using System.Runtime.CompilerServices;
|
|
using System.Runtime.Intrinsics;
|
|
using System.Runtime.Intrinsics.X86;
|
|
|
|
namespace PersistentOrderedMap;
|
|
|
|
public static class IntScanner
|
|
{
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int FindFirstGreaterOrEqual(ReadOnlySpan<int> keys, int target)
|
|
{
|
|
// Fallback for short arrays or unsupported hardware.
|
|
// AVX2 processes 8 integers at a time.
|
|
if (!Avx2.IsSupported || keys.Length < 8)
|
|
return LinearScan(keys, target);
|
|
|
|
return Avx512F.IsSupported
|
|
? ScanAvx512(keys, target)
|
|
: ScanAvx2(keys, target);
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static int LinearScan(ReadOnlySpan<int> keys, int target)
|
|
{
|
|
for (var i = 0; i < keys.Length; i++)
|
|
if (keys[i] >= target)
|
|
return i;
|
|
return keys.Length;
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static unsafe int ScanAvx2(ReadOnlySpan<int> keys, int target)
|
|
{
|
|
// AVX2 lacks a native GreaterOrEqual for 32-bit integers.
|
|
// We use GreaterThan(Data, target - 1).
|
|
var vTarget = Vector256.Create(target - 1);
|
|
var i = 0;
|
|
var len = keys.Length;
|
|
|
|
for (; i <= len - 8; i += 8)
|
|
{
|
|
fixed (int* ptr = keys)
|
|
{
|
|
var vData = Avx2.LoadVector256(ptr + i);
|
|
var vResult = Avx2.CompareGreaterThan(vData, vTarget);
|
|
|
|
// MoveMask creates a 32-bit integer from the most significant bit of each byte.
|
|
var mask = (uint)Avx2.MoveMask(vResult.AsByte());
|
|
|
|
if (mask != 0)
|
|
{
|
|
// Since an int is 4 bytes, MoveMask sets 4 bits per matching element.
|
|
// Dividing the trailing zero count by 4 maps the byte offset back to the integer index.
|
|
return i + (BitOperations.TrailingZeroCount(mask) / 4);
|
|
}
|
|
}
|
|
}
|
|
|
|
return LinearScan(keys.Slice(i), target) + i;
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static unsafe int ScanAvx512(ReadOnlySpan<int> keys, int target)
|
|
{
|
|
// AVX-512 processes 16 integers (512 bits) per instruction.
|
|
var vTarget = Vector512.Create(target);
|
|
var i = 0;
|
|
var len = keys.Length;
|
|
|
|
for (; i <= len - 16; i += 16)
|
|
{
|
|
fixed (int* ptr = keys)
|
|
{
|
|
var vData = Avx512F.LoadVector512(ptr + i);
|
|
|
|
// Vector512 API is used directly here to cleanly get the mask
|
|
var mask = Vector512.GreaterThanOrEqual(vData, vTarget);
|
|
|
|
if (mask != Vector512<int>.Zero)
|
|
{
|
|
uint m = (uint)mask.ExtractMostSignificantBits();
|
|
return i + BitOperations.TrailingZeroCount(m);
|
|
}
|
|
}
|
|
}
|
|
|
|
return LinearScan(keys.Slice(i), target) + i;
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
public static int FindFirstGreater(ReadOnlySpan<int> keys, int target)
|
|
{
|
|
if (!Avx2.IsSupported || keys.Length < 8)
|
|
return LinearScanGreater(keys, target);
|
|
|
|
return Avx512F.IsSupported
|
|
? ScanAvx512Greater(keys, target)
|
|
: ScanAvx2Greater(keys, target);
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static int LinearScanGreater(ReadOnlySpan<int> keys, int target)
|
|
{
|
|
for (var i = 0; i < keys.Length; i++)
|
|
if (keys[i] > target)
|
|
return i;
|
|
return keys.Length;
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static unsafe int ScanAvx2Greater(ReadOnlySpan<int> keys, int target)
|
|
{
|
|
// For > target, AVX2 CompareGreaterThan works directly without the (target - 1) offset
|
|
var vTarget = Vector256.Create(target);
|
|
var i = 0;
|
|
var len = keys.Length;
|
|
|
|
for (; i <= len - 8; i += 8)
|
|
{
|
|
fixed (int* ptr = keys)
|
|
{
|
|
var vData = Avx2.LoadVector256(ptr + i);
|
|
var vResult = Avx2.CompareGreaterThan(vData, vTarget);
|
|
|
|
var mask = (uint)Avx2.MoveMask(vResult.AsByte());
|
|
|
|
if (mask != 0)
|
|
{
|
|
return i + (BitOperations.TrailingZeroCount(mask) / 4);
|
|
}
|
|
}
|
|
}
|
|
|
|
return LinearScanGreater(keys.Slice(i), target) + i;
|
|
}
|
|
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
|
private static unsafe int ScanAvx512Greater(ReadOnlySpan<int> keys, int target)
|
|
{
|
|
var vTarget = Vector512.Create(target);
|
|
var i = 0;
|
|
var len = keys.Length;
|
|
|
|
for (; i <= len - 16; i += 16)
|
|
{
|
|
fixed (int* ptr = keys)
|
|
{
|
|
var vData = Avx512F.LoadVector512(ptr + i);
|
|
|
|
// Use GreaterThan instead of GreaterThanOrEqual
|
|
var mask = Vector512.GreaterThan(vData, vTarget);
|
|
|
|
if (mask != Vector512<int>.Zero)
|
|
{
|
|
uint m = (uint)mask.ExtractMostSignificantBits();
|
|
return i + BitOperations.TrailingZeroCount(m);
|
|
}
|
|
}
|
|
}
|
|
|
|
return LinearScanGreater(keys.Slice(i), target) + i;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|