Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve MurmurHash string hash memory footprint #5028

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/contrib/cluster/Akka.DistributedData/ORSet.cs
Expand Up @@ -709,9 +709,9 @@ private ORSet<T> MergeRemoveDelta(RemoveDeltaOperation delta)
{
while (deleteDots.MoveNext())
{
var curr = deleteDots.Current;
deleteDotNodes.Add(curr.Key);
deleteDotsAreGreater &= (thisDot != null && (thisDot.VersionAt(curr.Key) <= curr.Value));
var (key, value) = deleteDots.Current;
deleteDotNodes.Add(key);
deleteDotsAreGreater &= (thisDot != null && (thisDot.VersionAt(key) <= value));
}
}

Expand All @@ -720,7 +720,7 @@ private ORSet<T> MergeRemoveDelta(RemoveDeltaOperation delta)
{
if (thisDot != null)
{
using (var e = thisDot.VersionEnumerator)
using (IEnumerator<(UniqueAddress Key, long)> e = thisDot.VersionEnumerator)
{
var allContains = true;
while (e.MoveNext()) allContains &= deleteDotNodes.Contains(e.Current.Key);
Expand Down
Expand Up @@ -153,11 +153,11 @@ public static Proto.Msg.VersionVector VersionVectorToProto(VersionVector version
{
while (enumerator.MoveNext())
{
var current = enumerator.Current;
var (key, value) = enumerator.Current;
b.Entries.Add(new Proto.Msg.VersionVector.Types.Entry()
{
Node = UniqueAddressToProto(current.Key),
Version = current.Value
Node = UniqueAddressToProto(key),
Version = value
});
}
}
Expand Down
65 changes: 54 additions & 11 deletions src/contrib/cluster/Akka.DistributedData/VersionVector.cs
Expand Up @@ -52,13 +52,13 @@ public static VersionVector Create(ImmutableDictionary<UniqueAddress, long> vers
/// <summary>
/// Marker to signal that we have reached the end of a version vector.
/// </summary>
private static readonly KeyValuePair<UniqueAddress, long> EndMarker = new KeyValuePair<UniqueAddress, long>(null, long.MinValue);
private static readonly (UniqueAddress, long) EndMarker = (null, long.MinValue);

public abstract bool IsEmpty { get; }

public abstract int Count { get; }

public abstract IEnumerator<KeyValuePair<UniqueAddress, long>> VersionEnumerator { get; }
public abstract IEnumerator<(UniqueAddress, long)> VersionEnumerator { get; }
public static readonly VersionVector Empty = new MultiVersionVector(ImmutableDictionary<UniqueAddress, long>.Empty);

/// <summary>
Expand Down Expand Up @@ -163,11 +163,11 @@ private Ordering CompareOnlyTo(VersionVector other, Ordering order)
private T NextOrElse<T>(IEnumerator<T> enumerator, T defaultValue) =>
enumerator.MoveNext() ? enumerator.Current : defaultValue;

private Ordering Compare(IEnumerator<KeyValuePair<UniqueAddress, long>> i1,
IEnumerator<KeyValuePair<UniqueAddress, long>> i2, Ordering requestedOrder)
private Ordering Compare(IEnumerator<(UniqueAddress, long)> i1,
IEnumerator<(UniqueAddress, long)> i2, Ordering requestedOrder)
{
var nt1 = NextOrElse(i1, EndMarker);
var nt2 = NextOrElse(i2, EndMarker);
var nt1 = NextOrElse<(UniqueAddress Key, long Value)>(i1, EndMarker);
var nt2 = NextOrElse<(UniqueAddress Key, long Value)>(i2, EndMarker);
var currentOrder = Ordering.Same;
while (true)
{
Expand Down Expand Up @@ -214,13 +214,13 @@ private Ordering CompareOnlyTo(VersionVector other, Ordering order)
[DebuggerDisplay("VersionVector({Node}->{Version})")]
public sealed class SingleVersionVector : VersionVector
{
private sealed class Enumerator : IEnumerator<KeyValuePair<UniqueAddress, long>>
private sealed class Enumerator : IEnumerator<(UniqueAddress, long)>
{
private bool _moved = false;

public Enumerator(UniqueAddress node, long version)
{
Current = new KeyValuePair<UniqueAddress, long>(node, version);
Current = (node, version);
}

/// <inheritdoc/>
Expand All @@ -241,7 +241,7 @@ public void Reset()
_moved = false;
}

public KeyValuePair<UniqueAddress, long> Current { get; }
public (UniqueAddress, long) Current { get; }

object IEnumerator.Current => Current;
}
Expand All @@ -257,7 +257,7 @@ public SingleVersionVector(UniqueAddress node, long version)

public override bool IsEmpty => false;
public override int Count => 1;
public override IEnumerator<KeyValuePair<UniqueAddress, long>> VersionEnumerator => new Enumerator(Node, Version);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a breaking API change or is this a subtype of an internal type above?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, both of these are breaking changes.

I thought about doing this in #4990 - but I decided to avoid the breaking API change by exposing an internal API that should be used for this.

I don't think the risks are that high of this breaking change having a significant negative impact as DData is either primarily used directly or inside of cluster sharding and these APIs aren't called directly... but it's a judgement call.

public override IEnumerator<(UniqueAddress, long)> VersionEnumerator => new Enumerator(Node, Version);
public override VersionVector Increment(UniqueAddress node)
{
var v = Counter.GetAndIncrement();
Expand Down Expand Up @@ -317,6 +317,49 @@ public override int GetHashCode()
[Serializable]
public sealed class MultiVersionVector : VersionVector
{
internal class Enumerator : IEnumerator<(UniqueAddress, long)>
{
private readonly (UniqueAddress, long)[] _backing;
private readonly int _maxIndex;
private int _currentIndex = -1;

public Enumerator(ImmutableDictionary<UniqueAddress, long> versions)
{
_backing = new (UniqueAddress, long)[versions.Count];
var index = 0;
foreach (var kvp in versions)
{
_backing[index] = (kvp.Key, kvp.Value);
index++;
}

_maxIndex = _backing.Length - 1;
}

public bool MoveNext()
{
_currentIndex++;
if (_currentIndex > _maxIndex)
{
_currentIndex = _maxIndex;
return false;
}
return true;
}

public void Reset()
{
_currentIndex = 0;
}

public (UniqueAddress, long) Current
=> _currentIndex == -1 ? (null, 0) : _backing[_currentIndex];

object IEnumerator.Current => Current;

public void Dispose() { }
}

internal readonly ImmutableDictionary<UniqueAddress, long> Versions;

public MultiVersionVector(params KeyValuePair<UniqueAddress, long>[] nodeVersions)
Expand All @@ -336,7 +379,7 @@ public MultiVersionVector(ImmutableDictionary<UniqueAddress, long> nodeVersions)

public override bool IsEmpty => Versions.IsEmpty;
public override int Count => Versions.Count;
public override IEnumerator<KeyValuePair<UniqueAddress, long>> VersionEnumerator => Versions.GetEnumerator();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the second memory offender. Calling ImmutableDictionary GetEnumerator seemed to create a shallow copy of some of its internal data structure, consuming memory everytime it is called.

Fix by converting this to a Tuple IEnumerator and our own much simpler IEnumerator implementation.

public override IEnumerator<(UniqueAddress, long)> VersionEnumerator => new Enumerator(Versions);
public override VersionVector Increment(UniqueAddress node) =>
new MultiVersionVector(Versions.SetItem(node, Counter.GetAndIncrement()));

Expand Down
2 changes: 1 addition & 1 deletion src/core/Akka/Actor/Address.cs
Expand Up @@ -40,7 +40,7 @@ public int Compare(Address x, Address y)
if (result != 0) return result;
result = string.CompareOrdinal(x.System, y.System);
if (result != 0) return result;
result = string.CompareOrdinal(x.Host ?? "", y.Host ?? "");
result = string.CompareOrdinal(x.Host ?? string.Empty, y.Host ?? string.Empty);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

if (result != 0) return result;
result = (x.Port ?? 0).CompareTo(y.Port ?? 0);
return result;
Expand Down
6 changes: 3 additions & 3 deletions src/core/Akka/Util/MurmurHash.cs
Expand Up @@ -202,20 +202,20 @@ public static int StringHash(string s)
{
unchecked
{
var sChar = s.ToCharArray();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the biggest offending code. MurmurHash is called once for every DataEnvelope that are going to be sent as a gossip. For a system that are trying to gossip thousands of gossips, this can add up quite significantly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before

Method Mean Error StdDev Gen 0 Gen 1 Gen 2 Allocated
Murmur_string_hash 60.87 ns 1.240 ns 2.389 ns 0.0229 - - 96 B
Jenkins_string_hash 221.14 ns 0.767 ns 0.680 ns 0.0153 - - 64 B
Murmur_binary_hash 166.72 ns 2.177 ns 2.036 ns - - - -
Jenkins_binary_hash 370.40 ns 0.709 ns 0.554 ns - - - -

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After

BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19041.985 (2004/May2020Update/20H1)
AMD Ryzen 7 1700, 1 CPU, 16 logical and 8 physical cores
.NET SDK=5.0.203
  [Host]     : .NET Core 3.1.15 (CoreCLR 4.700.21.21202, CoreFX 4.700.21.21402), X64 RyuJIT
  DefaultJob : .NET Core 3.1.15 (CoreCLR 4.700.21.21202, CoreFX 4.700.21.21402), X64 RyuJIT

Method Mean Error StdDev Gen 0 Gen 1 Gen 2 Allocated
Murmur_string_hash 40.39 ns 0.082 ns 0.077 ns - - - -
Jenkins_string_hash 218.54 ns 0.467 ns 0.390 ns 0.0153 - - 64 B
Murmur_binary_hash 166.06 ns 2.224 ns 2.080 ns - - - -
Jenkins_binary_hash 372.00 ns 2.132 ns 1.664 ns - - - -

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is definitely a big improvement.

var span = s.AsSpan();
var h = StartHash((uint)s.Length * StringSeed);
var c = HiddenMagicA;
var k = HiddenMagicB;
var j = 0;
while (j + 1 < s.Length)
{
var i = (uint)((sChar[j] << 16) + sChar[j + 1]);
var i = (uint)((span[j] << 16) + span[j + 1]);
h = ExtendHash(h, i, c, k);
c = NextMagicA(c);
k = NextMagicB(k);
j += 2;
}
if (j < s.Length) h = ExtendHash(h, sChar[j], c, k);
if (j < s.Length) h = ExtendHash(h, span[j], c, k);
return (int)FinalizeHash(h);
}
}
Expand Down