namespace PersistentOrderedMap; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; // For AVX2 using System.Numerics; /// /// Helper for SIMD accelerated prefix scanning. /// public static class PrefixScanner { [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int FindFirstGreaterOrEqual(ReadOnlySpan prefixes, long targetPrefix) { // Fallback for short arrays or unsupported hardware if (!Avx2.IsSupported || prefixes.Length < 4) return LinearScan(prefixes, targetPrefix); return Avx512F.IsSupported ? ScanAvx512(prefixes, targetPrefix) : ScanAvx2(prefixes, targetPrefix); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static int LinearScan(ReadOnlySpan prefixes, long target) { for (var i = 0; i < prefixes.Length; i++) if (prefixes[i] >= target) return i; return prefixes.Length; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe int ScanAvx2(ReadOnlySpan prefixes, long target) { // Create a vector where every element is the target prefix var vTarget = Vector256.Create(target); var i = 0; var len = prefixes.Length; // Process 4 longs at a time (256 bits) for (; i <= len - 4; i += 4) fixed (long* ptr = prefixes) { var vData = Avx2.LoadVector256(ptr + i); // Compare: result is -1 (all 1s) if true, 0 if false // We want Data >= Target. // AVX2 CompareGreaterThan is for signed. Longs should be treated carefully, // but for text prefixes (positive), signed compare is usually sufficient. // Effectively: !(Data < Target) could be safer if signs vary, // but here we assume prefixes are derived from unsigned chars. // Standard AVX2 hack for CompareGreaterOrEqual (Signed): // No native _mm256_cmpge_epi64 in AVX2. // Use CompareGreaterThan(Data, Target - 1) var vResult = Avx2.CompareGreaterThan(vData, Vector256.Create(target - 1)); var mask = Avx2.MoveMask(vResult.AsByte()); if (mask != 0) { // Identify the first set bit corresponding to a 64-bit element // MoveMask returns 32 bits (1 per byte). Each long is 8 bytes. // We check bits 0, 8, 16, 24. if ((mask & 0xFF) != 0) return i + 0; if ((mask & 0xFF00) != 0) return i + 1; if ((mask & 0xFF0000) != 0) return i + 2; return i + 3; } } return LinearScan(prefixes.Slice(i), target) + i; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private static unsafe int ScanAvx512(ReadOnlySpan prefixes, long target) { var vTarget = Vector512.Create(target); var i = 0; var len = prefixes.Length; for (; i <= len - 8; i += 8) fixed (long* ptr = prefixes) { var vData = Avx512F.LoadVector512(ptr + i); // AVX512 has dedicated Compare Greater Than or Equal Long var mask = Avx512F.CompareGreaterThanOrEqual(vData, vTarget); if (mask != Vector512.Zero) { // Extract most significant bit mask var m = mask.ExtractMostSignificantBits(); // Count trailing zeros to find the index return i + BitOperations.TrailingZeroCount(m); } } return LinearScan(prefixes.Slice(i), target) + i; } }