Skip to content

Optimize ImmutableHashSet<T>.SetEquals to avoid unnecessary allocations#126309

Open
aw0lid wants to merge 1 commit intodotnet:mainfrom
aw0lid:fix-immutablehashset-setequals-allocs
Open

Optimize ImmutableHashSet<T>.SetEquals to avoid unnecessary allocations#126309
aw0lid wants to merge 1 commit intodotnet:mainfrom
aw0lid:fix-immutablehashset-setequals-allocs

Conversation

@aw0lid
Copy link
Copy Markdown

@aw0lid aw0lid commented Mar 30, 2026

Fixes #90986

Summary

ImmutableHashSet<T>.SetEquals always creates a new intermediate HashSet<T> for the other collection, leading to avoidable allocations and GC pressure, especially for large datasets

Optimization Logic

  • O(1) Pre-Scan: Immediately returns false if other is an ICollection with a smaller Count, avoiding any overhead.
  • Fast-Path Pattern Matching: Detects ImmutableHashSet<T> and HashSet<T> to bypass intermediate allocations.
  • Comparer Guard: Validates EqualityComparer compatibility before triggering fast paths to ensure logical consistency.
  • Short-Circuit Validation: Re-validates Count within specialized paths for an immediate exit before $O(n)$ enumeration.
  • Zero-Allocation Execution: Direct iteration over compatible collections, eliminating the costly new HashSet<T>(other) fallback.
  • Deferred fallback: Reserves the expensive allocation solely for general IEnumerable types.
Click to expand Benchmark Source Code
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Order;
using BenchmarkDotNet.Running;
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;

namespace ImmutableHashSetBenchmarks
{
    [MemoryDiagnoser]
    [Orderer(SummaryOrderPolicy.FastestToSlowest)]
    [RankColumn]
    public class ImmutableHashSetSetEqualsBenchmark_Int
    {
        private ImmutableHashSet<int> _sourceSet = null!;
        private ImmutableHashSet<int> _immutableHashSetEqual = null!;
        private HashSet<int> _bclHashSetEqual = null!;
        private List<int> _listEqual = null!;
        private IEnumerable<int> _linqSelectEqual = null!;
        private int[] _arrayEqual = null!;
        private List<int> _listLastDiff = null!;
        private List<int> _listSmaller = null!;
        private ImmutableHashSet<int> _immutableLarger = null!;
        private int[] _smallerArray = null!;
        private HashSet<int> _smallerHashSetDiffComparer = null!;

        // Worst case: same count, last element different
        private ImmutableHashSet<int> _immutableHashSetLastDiff = null!;
        private HashSet<int> _bclHashSetLastDiff = null!;
        private List<int> _listWithDuplicates = null!;
        private List<int> _listWithDuplicatesMatch = null!;

        // Different comparers (fallback path)
        private HashSet<int> _bclHashSetDiffComparer = null!;

        // Count mismatch early exit
        private ImmutableHashSet<int> _immutableHashSetSmaller = null!;
        private HashSet<int> _bclHashSetSmaller = null!;

        // Lazy enumerable for worst case
        private IEnumerable<int> _lazyEnumerableLastDiff = null!;
       

        [Params(100000)]
        public int Size { get; set; }

        [GlobalSetup]
        public void Setup()
        {

            var elements = Enumerable.Range(0, Size).ToList();
            var elementsWithLastDiff = Enumerable.Range(0, Size - 1).Concat(new[] { Size + 1000 }).ToList();
            var smallerElements = Enumerable.Range(0, Size / 2).ToList();
            var duplicates = Enumerable.Repeat(1, Size).ToList();
            var smallerList = new List<int>();

            for(int i = 0; i < Size - 1; i++) smallerList.Add(i);

            _sourceSet = ImmutableHashSet.CreateRange(elements);
            _immutableHashSetEqual = ImmutableHashSet.CreateRange(elements);
            _bclHashSetEqual = new HashSet<int>(elements);
            _listEqual = elements;
            _linqSelectEqual = elements.Select(x => x); // Lazy LINQ enumerable
            _arrayEqual = elements.ToArray();

            _immutableHashSetLastDiff = ImmutableHashSet.CreateRange(elementsWithLastDiff);
            _bclHashSetLastDiff = new HashSet<int>(elementsWithLastDiff);
            _listLastDiff = elementsWithLastDiff;

            _bclHashSetDiffComparer = new HashSet<int>(elements, new ReverseComparer<int>());

            _immutableHashSetSmaller = ImmutableHashSet.CreateRange(smallerElements);
            _bclHashSetSmaller = new HashSet<int>(smallerElements);

            _lazyEnumerableLastDiff = elementsWithLastDiff.Select(x => x);
            _immutableLarger = ImmutableHashSet.CreateRange(elements.Concat(new[] { -1 }));
            _listWithDuplicates = duplicates;
            _listWithDuplicatesMatch = elements.Concat(elements).ToList(); // Matches source but with duplicates
           _listSmaller = smallerList;
           _smallerArray = Enumerable.Range(0, Size - 1).ToArray();
           _smallerHashSetDiffComparer = new HashSet<int>(_listSmaller, new ReverseComparer<int>());
        }

        #region Fast Path: Same Type and Comparer (Optimized)

        [Benchmark(Description = "ImmutableHashSet (Match - Same Comparer)")]
        public bool Case_ImmutableHashSet_Match() => _sourceSet.SetEquals(_immutableHashSetEqual);

        [Benchmark(Description = "BCL HashSet (Match - Same Comparer)")]
        public bool Case_BclHashSet_Match() => _sourceSet.SetEquals(_bclHashSetEqual);

        [Benchmark(Description = "ImmutableHashSet (Mismatch - Same Count)")]
        public bool Case_ImmutableHashSet_LastDiff() => _sourceSet.SetEquals(_immutableHashSetLastDiff);

        [Benchmark(Description = "Case 04: BCL HashSet (Mismatch - Same Count)")]
        public bool Case_BclHashSet_LastDiff() => _sourceSet.SetEquals(_bclHashSetLastDiff);

        #endregion

        #region Early Exit: Count Mismatch

        [Benchmark(Description = "ImmutableHashSet (Smaller Count)")]
        public bool Case_ImmutableHashSet_SmallerCount() => _sourceSet.SetEquals(_immutableHashSetSmaller);

        [Benchmark(Description = "BCL HashSet (Smaller Count)")]
        public bool Case_BclHashSet_SmallerCount() => _sourceSet.SetEquals(_bclHashSetSmaller);

        [Benchmark(Description = "Array (Smaller Count)")]
        public bool Case_SmallerCollection_EarlyExit() 
        {
            return _sourceSet.SetEquals(_smallerArray);
        }

        #endregion

        #region Fallback Path: Different Comparer

        [Benchmark(Description = "HashSet (Different Comparer)")]
        public bool Case_HashSet_DifferentComparer() => _sourceSet.SetEquals(_bclHashSetDiffComparer);

        [Benchmark(Description = "HashSet (Smaller Count - Different Comparer)")]
        public bool Case_HashSet_SmallerCount_DiffComparer() => _sourceSet.SetEquals(_smallerHashSetDiffComparer);

        #endregion

        #region Fallback Path: Non-Set Collections (IEnumerable/ICollection)

        [Benchmark(Description = "List (Match - Fallback)")]
        public bool Case_List_Match() => _sourceSet.SetEquals(_listEqual);

        [Benchmark(Description = "LINQ (Mismatch - Lazy IEnumerable)")]
        public bool Case_LazyEnumerable_LastDiff() => _sourceSet.SetEquals(_lazyEnumerableLastDiff);

        [Benchmark(Description = "LINQ (Match - Lazy IEnumerable)")]
        public bool Case_LazyEnumerable_Match() => _sourceSet.SetEquals(_linqSelectEqual);

        [Benchmark(Description = "List (Last Diff - Fallback)")]
        public bool Case_List_LastDiff() => _sourceSet.SetEquals(_listLastDiff);

        [Benchmark(Description = "Array (Match - Fallback)")]
        public bool Case_Array_Match() => _sourceSet.SetEquals(_arrayEqual);

        [Benchmark(Description = "ImmutableHashSet (Larger Count)")]
        public bool Case_LargerCount() => _sourceSet.SetEquals(_immutableLarger);

        #endregion

        #region Handling Duplicates (Fallback Path)

        [Benchmark(Description = "List with Duplicates (Mismatch)")]
        public bool Case_List_Duplicates_Mismatch() => _sourceSet.SetEquals(_listWithDuplicates);

        [Benchmark(Description = "List with Duplicates (Match)")]
        public bool Case_List_Duplicates_Match() => _sourceSet.SetEquals(_listWithDuplicatesMatch);

        #endregion
    }

    public class ReverseComparer<T> : IEqualityComparer<T> where T : IComparable<T>
    {
        public bool Equals(T? x, T? y)
        {
            if (x is null && y is null) return true;
            if (x is null || y is null) return false;
            return x.CompareTo(y) == 0;
        }

        public int GetHashCode(T? obj)
        {
            return obj?.GetHashCode() ?? 0;
        }
    }

    public class Program
    {
        public static void Main(string[] args)
        {
            BenchmarkRunner.Run<ImmutableHashSetSetEqualsBenchmark_Int>();
        }
    }
}
Click to expand Benchmark Results

Benchmark Results (Before Optimization)

Method Size Mean Error StdDev Rank Gen0 Gen1 Gen2 Allocated
'BCL HashSet (Smaller Count)' 100000 313.8 us 6.01 us 6.43 us 1 15.6250 15.6250 15.6250 818.33 KB
'Array (Smaller Count)' 100000 647.9 us 11.20 us 11.50 us 2 26.3672 26.3672 26.3672 1697.7 KB
'List with Duplicates (Mismatch)' 100000 954.1 us 18.77 us 41.60 us 3 31.2500 31.2500 31.2500 1697.77 KB
' HashSet (Smaller Count - Different Comparer)' 100000 1,449.3 us 28.65 us 74.46 us 4 41.0156 41.0156 41.0156 1697.8 KB
' ImmutableHashSet (Smaller Count)' 100000 4,733.2 us 74.18 us 69.39 us 5 23.4375 23.4375 23.4375 818.58 KB
' BCL HashSet (Match - Same Comparer)' 100000 7,084.0 us 65.02 us 57.64 us 6 54.6875 54.6875 54.6875 1697.9 KB
'Array (Match - Fallback)' 100000 7,821.7 us 30.71 us 27.23 us 7 46.8750 46.8750 46.8750 1697.86 KB
'List (Match - Fallback)' 100000 8,428.4 us 30.82 us 28.83 us 8 46.8750 46.8750 46.8750 1697.9 KB
'BCL HashSet (Mismatch - Same Count)' 100000 8,636.3 us 52.37 us 46.42 us 8 46.8750 46.8750 46.8750 1697.86 KB
'List (Last Diff - Fallback)' 100000 9,172.5 us 35.85 us 33.54 us 9 46.8750 46.8750 46.8750 1697.9 KB
'List with Duplicates (Match)' 100000 9,310.2 us 128.11 us 119.83 us 9 109.3750 109.3750 109.3750 3521.42 KB
' ImmutableHashSet (Larger Count)' 100000 9,477.3 us 141.55 us 125.48 us 9 46.8750 46.8750 46.8750 1697.89 KB
' HashSet (Different Comparer)' 100000 9,839.2 us 99.14 us 87.88 us 9 46.8750 46.8750 46.8750 1697.79 KB
'LINQ (Mismatch - Lazy IEnumerable)' 100000 11,274.4 us 63.77 us 56.53 us 10 296.8750 156.2500 156.2500 4717.23 KB
'LINQ (Match - Lazy IEnumerable)' 100000 11,341.5 us 69.37 us 61.49 us 10 296.8750 156.2500 156.2500 4717.23 KB
'ImmutableHashSet (Mismatch - Same Count)' 100000 17,015.5 us 170.03 us 150.73 us 11 31.2500 31.2500 31.2500 1697.88 KB
'ImmutableHashSet (Match - Same Comparer)' 100000 17,410.2 us 334.48 us 312.87 us 11 31.2500 31.2500 31.2500 1697.87 KB

Benchmark Results (After Optimization)

Method Size Mean Error StdDev Rank Gen0 Gen1 Gen2 Allocated
'BCL HashSet (Smaller Count)' 100000 4.440 ns 0.0722 ns 0.0640 ns 2 - - - -
'Array (Smaller Count)' 100000 4.358 ns 0.1327 ns 0.1725 ns 2 - - - -
'List with Duplicates (Mismatch)' 100000 819,776.626 ns 20,883.6926 ns 59,243.5643 ns 3 31.2500 31.2500 31.2500 1738507 B
'HashSet (Smaller Count - Different Comparer)' 100000 4.303 ns 0.0391 ns 0.0597 ns 2 - - - -
'ImmutableHashSet (Smaller Count)' 100000 3.067 ns 0.0498 ns 0.0489 ns 1 - - - -
'BCL HashSet (Match - Same Comparer)' 100000 6,114,276.638 ns 37,962.0529 ns 35,509.7278 ns 4 - - - -
'Array (Match - Fallback)' 100000 7,850,947.512 ns 69,284.0286 ns 61,418.4867 ns 5 46.8750 46.8750 46.8750 1738644 B
'List (Match - Fallback)' 100000 7,835,232.854 ns 56,686.4440 ns 53,024.5349 ns 5 46.8750 46.8750 46.8750 1738772 B
'BCL HashSet (Mismatch - Same Count)' 100000 6,246,297.683 ns 28,808.5674 ns 26,947.5519 ns 4 - - - -
'List (Last Diff - Fallback)' 100000 7,957,405.824 ns 81,299.4562 ns 76,047.5618 ns 5 46.8750 46.8750 46.8750 1738647 B
'List with Duplicates (Match)' 100000 8,607,130.755 ns 168,711.8478 ns 173,254.6464 ns 6 93.7500 93.7500 93.7500 3605853 B
'ImmutableHashSet (Larger Count)' 100000 3.057 ns 0.0622 ns 0.1152 ns 1 - - - -
'HashSet (Different Comparer)' 100000 8,399,771.971 ns 81,314.3501 ns 72,083.0534 ns 6 46.8750 46.8750 46.8750 1738651 B
'LINQ (Mismatch - Lazy IEnumerable)' 100000 9,629,772.579 ns 180,985.4226 ns 208,423.0819 ns 7 296.8750 156.2500 156.2500 4830564 B
'LINQ (Match - Lazy IEnumerable)' 100000 9,422,079.159 ns 181,499.2273 ns 186,386.3438 ns 7 296.8750 156.2500 156.2500 4830569 B
'ImmutableHashSet (Mismatch - Same Count)' 100000 13,463,793.002 ns 91,391.4784 ns 81,016.1652 ns 8 - - - -
'ImmutableHashSet (Match - Same Comparer)' 100000 13,041,742.770 ns 108,600.6130 ns 96,271.6147 ns 8 - - - -

Performance Analysis Summary (100,000 Elements)

Case Before (ns) After (ns) Speedup Ratio Memory Improvement
ImmutableHashSet (Larger) 9,477,300 3.057 3,100,196x -100% (Zero Alloc)
ImmutableHashSet (Smaller) 4,733,200 3.067 1,543,267x -100% (Zero Alloc)
HashSet (Smaller - Diff) 1,449,300 4.303 336,811x -100% (Zero Alloc)
Array (Smaller) 647,900 4.358 148,669x -100% (Zero Alloc)
BCL HashSet (Smaller) 313,800 4.440 70,675x -100% (Zero Alloc)
BCL HashSet (Mismatch) 8,636,300 6,246,297 1.38x -100% (Zero Alloc)
ImmutableHashSet (Match) 17,410,200 13,041,742 1.33x -100% (Zero Alloc)
ImmutableHashSet (Mismatch) 17,015,500 13,463,793 1.26x -100% (Zero Alloc)
LINQ (Match) 11,341,500 9,422,079 1.20x Stable
BCL HashSet (Match) 7,084,000 6,114,276 1.16x -100% (Zero Alloc)
LINQ (Mismatch) 11,274,400 9,629,772 1.17x Stable
List (Duplicates - Mismatch) 954,100 819,776 1.16x Stable
List (Last Diff) 9,172,500 7,957,405 1.15x Stable
HashSet (Diff Comparer) 9,839,200 8,399,771 1.17x Stable
List (Match - Fallback) 8,428,400 7,835,232 1.07x Stable
List (Duplicates - Match) 9,310,200 8,607,130 1.08x Stable
Array (Match - Fallback) 7,821,700 7,850,947 ~1.00x Stable

@dotnet-policy-service dotnet-policy-service bot added the community-contribution Indicates that the PR has been added by a community member label Mar 30, 2026
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from 9910d86 to ff6af74 Compare April 3, 2026 14:44
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from ff6af74 to 5f2749e Compare April 3, 2026 19:27
@aw0lid aw0lid requested a review from stephentoub April 4, 2026 11:46
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch 3 times, most recently from 3c685c8 to 45c2c14 Compare April 8, 2026 23:05
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from 45c2c14 to 6a3ebf6 Compare April 12, 2026 20:53
@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch 2 times, most recently from 13cc045 to 1ab929a Compare April 12, 2026 21:23
@aw0lid
Copy link
Copy Markdown
Author

aw0lid commented Apr 13, 2026

Gentle ping in case this fell through the cracks
Happy to address any feedback or make adjustments if needed.

@aw0lid aw0lid force-pushed the fix-immutablehashset-setequals-allocs branch from 1ab929a to 6a2294d Compare April 13, 2026 18:43
@tannergooding
Copy link
Copy Markdown
Member

@dotnet/area-system-collections for secondary review

{
return false;
}
foreach (T item in otherAsHashSet)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ImmutableHashSet uses an AVL tree which makes lookup O(log n), so this loop is (n log n). However, lookup in a HashSet is O(1) so if you flip the enumeration to be on the ImmutableHashSet instead of the HashSet, then this loop becomes O(n).

Copy link
Copy Markdown
Author

@aw0lid aw0lid Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch! It's a very straightforward optimization that we missed. However, while implementing it, I found a performance regression (about 3-4ns) in the fast-path scenarios. I believe this is because the increased method complexity prevents the JIT from inlining it.

I've experimented with two approaches. The first was keeping it as a single method:

private static bool SetEquals(IEnumerable<T> other, MutationInput origin)
{
    Requires.NotNull(other, nameof(other));

    if (other is ICollection<T> otherAsICollectionGeneric)
    {
        if (otherAsICollectionGeneric.Count < origin.Count)
        {
            return false;
        }

        if (other is HashSet<T> otherAsHashSet)
        {
            if (otherAsHashSet.Comparer == origin.EqualityComparer)
            {
                if (otherAsHashSet.Count != origin.Count)
                {
                    return false;
                }

                var e1 = new ImmutableHashSet<T>.Enumerator(origin.Root);
                while (e1.MoveNext())
                {
                    if (!otherAsHashSet.Contains(e1.Current))
                    {
                        return false;
                    }
                }

                return true;
            }
        }
        else if (other is ImmutableHashSet<T> otherAsImmutableHashSet)
        {
            if (otherAsImmutableHashSet.KeyComparer == origin.EqualityComparer)
            {
                if (otherAsImmutableHashSet.Count != origin.Count)
                {
                    return false;
                }

                foreach (T item in otherAsImmutableHashSet)
                {
                    if (!Contains(item, origin))
                    {
                        return false;
                    }
                }

                return true;
            }
        }
    }
    else if (other is ICollection otherAsICollection)
    {
        if (otherAsICollection.Count < origin.Count)
        {
            return false;
        }
    }

    var otherSet = new HashSet<T>(other, origin.EqualityComparer);
    if (origin.Count != otherSet.Count)
    {
        return false;
    }

    var e = new ImmutableHashSet<T>.Enumerator(origin.Root);
    while (e.MoveNext())
    {
        if (!otherSet.Contains(e.Current))
        {
            return false;
        }
    }

    return true;
}

The second was splitting the logic :

public bool SetEquals(IEnumerable<T> other)
{
    Requires.NotNull(other, nameof(other));

    if (object.ReferenceEquals(this, other))
    {
        return true;
    }

    if (other is ICollection<T> otherAsICollectionGeneric)
    {
        return SetEqualsFastPath(otherAsICollectionGeneric, this.Origin);
    }
    else if (other is ICollection otherAsICollection)
    {
        if (otherAsICollection.Count < this.Count)
        {
            return false;
        }
    }

    return SetEquals(other, this.Origin);
}

private static bool SetEqualsFastPath(ICollection<T> other, MutationInput origin)
{
    if (other.Count < origin.Count)
    {
        return false;
    }

    if (other is HashSet<T> otherAsHashSet)
    {
        if (otherAsHashSet.Comparer == origin.EqualityComparer)
        {
            if (otherAsHashSet.Count != origin.Count)
            {
                return false;
            }

            var e = new ImmutableHashSet<T>.Enumerator(origin.Root);
            while (e.MoveNext())
            {
                if (!otherAsHashSet.Contains(e.Current))
                {
                    return false;
                }
            }

            return true;
        }
    }
    else if (other is ImmutableHashSet<T> otherAsImmutableHashSet)
    {
        if (otherAsImmutableHashSet.KeyComparer == origin.EqualityComparer)
        {
            if (otherAsImmutableHashSet.Count != origin.Count)
            {
                return false;
            }
            foreach (T item in otherAsImmutableHashSet)
            {
                if (!Contains(item, origin))
                {
                    return false;
                }
            }

            return true;
        }
    }

    return SetEquals(other, origin);
}

private static bool SetEquals(IEnumerable<T> other, MutationInput origin)
{
    Requires.NotNull(other, nameof(other));

    var otherSet = new HashSet<T>(other, origin.EqualityComparer);
    if (origin.Count != otherSet.Count)
    {
        return false;
    }

    var e = new ImmutableHashSet<T>.Enumerator(origin.Root);
    while (e.MoveNext())
    {
        if (!otherSet.Contains(e.Current))
        {
            return false;
        }
    }

    return true;
}
Method Scenario Before Splitting (No Inlining) After Splitting (FastPath) Regression / Overhead Improvement %
BCL HashSet (Smaller Count) 10.317 ns 3.066 ns 7.251 ns 70.3%
HashSet (Diff Comparer - Small) 10.340 ns 3.039 ns 7.301 ns 70.6%
ImmutableHashSet (Larger Count) 7.674 ns 3.951 ns 3.723 ns 48.5%
ImmutableHashSet (Smaller Count) 7.841 ns 4.689 ns 3.152 ns 40.2%
Array (Smaller Count) 10.653 ns 8.042 ns 2.611 ns 24.5%

If my assumption is correct, do you think splitting is the best approach here, or is there a way to simplify the method so the JIT can inline it while unified?
cc/ @tannergooding

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

area-System.Collections community-contribution Indicates that the PR has been added by a community member

Projects

None yet

Development

Successfully merging this pull request may close these issues.

ImmutableHashSet<T>.SetEquals always creates a new HashSet<T>

4 participants