162 lines
5.9 KiB
C#
162 lines
5.9 KiB
C#
|
|
using System.Numerics;
|
||
|
|
using System.Runtime.CompilerServices;
|
||
|
|
using System.Runtime.Intrinsics;
|
||
|
|
using System.Runtime.Intrinsics.X86;
|
||
|
|
|
||
|
|
namespace PersistentMap;
|
||
|
|
|
||
|
|
using System;
|
||
|
|
using System.Buffers.Binary;
|
||
|
|
using System.Runtime.CompilerServices;
|
||
|
|
|
||
|
|
public interface IKeyStrategy<K>
|
||
|
|
{
|
||
|
|
int Compare(K x, K y);
|
||
|
|
long GetPrefix(K key);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
public struct UnicodeStrategy : IKeyStrategy<string>
|
||
|
|
{
|
||
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
|
|
public int Compare(string? x, string? y) => string.CompareOrdinal(x, y);
|
||
|
|
|
||
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
|
|
public long GetPrefix(string key)
|
||
|
|
{
|
||
|
|
if (string.IsNullOrEmpty(key)) return long.MinValue;
|
||
|
|
|
||
|
|
// 1. Prepare Buffer (8 bytes)
|
||
|
|
// stackalloc is virtually free (pointer bump)
|
||
|
|
Span<byte> utf8Bytes = stackalloc byte[8];
|
||
|
|
|
||
|
|
// 2. Transcode (The "Safe" Magic)
|
||
|
|
// This intrinsic handles ASCII efficiently and converts Surrogates/Chinese
|
||
|
|
// into bytes that maintain the correct "Magnitude" (Sort Order).
|
||
|
|
// Invalid surrogates become 0xEF (Replacement Char), which sorts > ASCII.
|
||
|
|
System.Text.Unicode.Utf8.FromUtf16(
|
||
|
|
key.AsSpan(0, Math.Min(key.Length, 8)),
|
||
|
|
utf8Bytes,
|
||
|
|
out _,
|
||
|
|
out _,
|
||
|
|
replaceInvalidSequences: true); // True ensures we get 0xEF for broken chars
|
||
|
|
|
||
|
|
// 3. Load as Big Endian Long
|
||
|
|
long packed = BinaryPrimitives.ReadInt64BigEndian(utf8Bytes);
|
||
|
|
|
||
|
|
// 4. Sign Toggle
|
||
|
|
// Maps the byte range 0x00..0xFF to the signed long range Min..Max
|
||
|
|
// Essential for the < and > operators to work correctly.
|
||
|
|
return packed ^ unchecked((long)0x8080808080808080);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
public struct IntStrategy : IKeyStrategy<int>
|
||
|
|
{
|
||
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
|
|
public int Compare(int x, int y) => x.CompareTo(y);
|
||
|
|
|
||
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
|
|
public long GetPrefix(int key)
|
||
|
|
{
|
||
|
|
// Pack the 32-bit int into the high 32-bits of the long.
|
||
|
|
// This preserves sorting order when scanning the long array.
|
||
|
|
// Cast to uint first to prevent sign extension confusion during the shift,
|
||
|
|
// though standard int shifting usually works fine for direct mapping.
|
||
|
|
return (long)key << 32;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
/// <summary>
|
||
|
|
/// Helper for SIMD accelerated prefix scanning.
|
||
|
|
/// </summary>
|
||
|
|
public static class PrefixScanner
|
||
|
|
{
|
||
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
|
|
public static int FindFirstGreaterOrEqual(ReadOnlySpan<long> prefixes, long targetPrefix)
|
||
|
|
{
|
||
|
|
// Fallback for short arrays or unsupported hardware
|
||
|
|
if (!Avx2.IsSupported || prefixes.Length < 4)
|
||
|
|
return LinearScan(prefixes, targetPrefix);
|
||
|
|
|
||
|
|
return Avx512F.IsSupported
|
||
|
|
? ScanAvx512(prefixes, targetPrefix)
|
||
|
|
: ScanAvx2(prefixes, targetPrefix);
|
||
|
|
}
|
||
|
|
|
||
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
|
|
private static int LinearScan(ReadOnlySpan<long> prefixes, long target)
|
||
|
|
{
|
||
|
|
for (var i = 0; i < prefixes.Length; i++)
|
||
|
|
if (prefixes[i] >= target)
|
||
|
|
return i;
|
||
|
|
return prefixes.Length;
|
||
|
|
}
|
||
|
|
|
||
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
|
|
private static unsafe int ScanAvx2(ReadOnlySpan<long> prefixes, long target)
|
||
|
|
{
|
||
|
|
// Create a vector where every element is the target prefix
|
||
|
|
var vTarget = Vector256.Create(target);
|
||
|
|
var i = 0;
|
||
|
|
var len = prefixes.Length;
|
||
|
|
|
||
|
|
// Process 4 longs at a time (256 bits)
|
||
|
|
for (; i <= len - 4; i += 4)
|
||
|
|
fixed (long* ptr = prefixes)
|
||
|
|
{
|
||
|
|
var vData = Avx2.LoadVector256(ptr + i);
|
||
|
|
|
||
|
|
// Compare: result is -1 (all 1s) if true, 0 if false
|
||
|
|
// We want Data >= Target.
|
||
|
|
// AVX2 CompareGreaterThan is for signed. Longs should be treated carefully,
|
||
|
|
// but for text prefixes (positive), signed compare is usually sufficient.
|
||
|
|
// Effectively: !(Data < Target) could be safer if signs vary,
|
||
|
|
// but here we assume prefixes are derived from unsigned chars.
|
||
|
|
// Standard AVX2 hack for CompareGreaterOrEqual (Signed):
|
||
|
|
// No native _mm256_cmpge_epi64 in AVX2.
|
||
|
|
// Use CompareGreaterThan(Data, Target - 1)
|
||
|
|
var vResult = Avx2.CompareGreaterThan(vData, Vector256.Create(target - 1));
|
||
|
|
|
||
|
|
var mask = Avx2.MoveMask(vResult.AsByte());
|
||
|
|
|
||
|
|
if (mask != 0)
|
||
|
|
{
|
||
|
|
// Identify the first set bit corresponding to a 64-bit element
|
||
|
|
// MoveMask returns 32 bits (1 per byte). Each long is 8 bytes.
|
||
|
|
// We check bits 0, 8, 16, 24.
|
||
|
|
if ((mask & 0xFF) != 0) return i + 0;
|
||
|
|
if ((mask & 0xFF00) != 0) return i + 1;
|
||
|
|
if ((mask & 0xFF0000) != 0) return i + 2;
|
||
|
|
return i + 3;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return LinearScan(prefixes.Slice(i), target) + i;
|
||
|
|
}
|
||
|
|
|
||
|
|
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||
|
|
private static unsafe int ScanAvx512(ReadOnlySpan<long> prefixes, long target)
|
||
|
|
{
|
||
|
|
var vTarget = Vector512.Create(target);
|
||
|
|
var i = 0;
|
||
|
|
var len = prefixes.Length;
|
||
|
|
|
||
|
|
for (; i <= len - 8; i += 8)
|
||
|
|
fixed (long* ptr = prefixes)
|
||
|
|
{
|
||
|
|
var vData = Avx512F.LoadVector512(ptr + i);
|
||
|
|
// AVX512 has dedicated Compare Greater Than or Equal Long
|
||
|
|
var mask = Avx512F.CompareGreaterThanOrEqual(vData, vTarget);
|
||
|
|
|
||
|
|
if (mask != Vector512<long>.Zero)
|
||
|
|
{
|
||
|
|
// Extract most significant bit mask
|
||
|
|
var m = mask.ExtractMostSignificantBits();
|
||
|
|
// Count trailing zeros to find the index
|
||
|
|
return i + BitOperations.TrailingZeroCount(m);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return LinearScan(prefixes.Slice(i), target) + i;
|
||
|
|
}
|
||
|
|
}
|