added proper benchmarking against others

changed StandardStrategy tonuse binary search.

and ao on
This commit is contained in:
Linus Björnstam 2026-04-22 19:30:46 +02:00
parent 9242c1c751
commit c7c5c7b81b
9 changed files with 648 additions and 627 deletions

View file

@ -0,0 +1,90 @@
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace PersistentMap;
public static class IntScanner
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int FindFirstGreaterOrEqual(ReadOnlySpan<int> keys, int target)
{
// Fallback for short arrays or unsupported hardware.
// AVX2 processes 8 integers at a time.
if (!Avx2.IsSupported || keys.Length < 8)
return LinearScan(keys, target);
return Avx512F.IsSupported
? ScanAvx512(keys, target)
: ScanAvx2(keys, target);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static int LinearScan(ReadOnlySpan<int> keys, int target)
{
for (var i = 0; i < keys.Length; i++)
if (keys[i] >= target)
return i;
return keys.Length;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int ScanAvx2(ReadOnlySpan<int> keys, int target)
{
// AVX2 lacks a native GreaterOrEqual for 32-bit integers.
// We use GreaterThan(Data, target - 1).
var vTarget = Vector256.Create(target - 1);
var i = 0;
var len = keys.Length;
for (; i <= len - 8; i += 8)
{
fixed (int* ptr = keys)
{
var vData = Avx2.LoadVector256(ptr + i);
var vResult = Avx2.CompareGreaterThan(vData, vTarget);
// MoveMask creates a 32-bit integer from the most significant bit of each byte.
var mask = (uint)Avx2.MoveMask(vResult.AsByte());
if (mask != 0)
{
// Since an int is 4 bytes, MoveMask sets 4 bits per matching element.
// Dividing the trailing zero count by 4 maps the byte offset back to the integer index.
return i + (BitOperations.TrailingZeroCount(mask) / 4);
}
}
}
return LinearScan(keys.Slice(i), target) + i;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe int ScanAvx512(ReadOnlySpan<int> keys, int target)
{
// AVX-512 processes 16 integers (512 bits) per instruction.
var vTarget = Vector512.Create(target);
var i = 0;
var len = keys.Length;
for (; i <= len - 16; i += 16)
{
fixed (int* ptr = keys)
{
var vData = Avx512F.LoadVector512(ptr + i);
// Vector512 API is used directly here to cleanly get the mask
var mask = Vector512.GreaterThanOrEqual(vData, vTarget);
if (mask != Vector512<int>.Zero)
{
uint m = (uint)mask.ExtractMostSignificantBits();
return i + BitOperations.TrailingZeroCount(m);
}
}
}
return LinearScan(keys.Slice(i), target) + i;
}
}

View file

@ -23,7 +23,7 @@ public readonly struct StandardStrategy<K> : IKeyStrategy<K>
}
// Tell the B-Tree to skip SIMD routing and just use LinearSearch
public bool UsesPrefixes => false;
public bool UseBinarySearch => true;
// This will never be called because UsesPrefixes is false,
// but we must satisfy the interface.
[MethodImpl(MethodImplOptions.AggressiveInlining)]