New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve MurmurHash string hash memory footprint #5028
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,13 +52,13 @@ public static VersionVector Create(ImmutableDictionary<UniqueAddress, long> vers | |
/// <summary> | ||
/// Marker to signal that we have reached the end of a version vector. | ||
/// </summary> | ||
private static readonly KeyValuePair<UniqueAddress, long> EndMarker = new KeyValuePair<UniqueAddress, long>(null, long.MinValue); | ||
private static readonly (UniqueAddress, long) EndMarker = (null, long.MinValue); | ||
|
||
public abstract bool IsEmpty { get; } | ||
|
||
public abstract int Count { get; } | ||
|
||
public abstract IEnumerator<KeyValuePair<UniqueAddress, long>> VersionEnumerator { get; } | ||
public abstract IEnumerator<(UniqueAddress, long)> VersionEnumerator { get; } | ||
public static readonly VersionVector Empty = new MultiVersionVector(ImmutableDictionary<UniqueAddress, long>.Empty); | ||
|
||
/// <summary> | ||
|
@@ -163,11 +163,11 @@ private Ordering CompareOnlyTo(VersionVector other, Ordering order) | |
private T NextOrElse<T>(IEnumerator<T> enumerator, T defaultValue) => | ||
enumerator.MoveNext() ? enumerator.Current : defaultValue; | ||
|
||
private Ordering Compare(IEnumerator<KeyValuePair<UniqueAddress, long>> i1, | ||
IEnumerator<KeyValuePair<UniqueAddress, long>> i2, Ordering requestedOrder) | ||
private Ordering Compare(IEnumerator<(UniqueAddress, long)> i1, | ||
IEnumerator<(UniqueAddress, long)> i2, Ordering requestedOrder) | ||
{ | ||
var nt1 = NextOrElse(i1, EndMarker); | ||
var nt2 = NextOrElse(i2, EndMarker); | ||
var nt1 = NextOrElse<(UniqueAddress Key, long Value)>(i1, EndMarker); | ||
var nt2 = NextOrElse<(UniqueAddress Key, long Value)>(i2, EndMarker); | ||
var currentOrder = Ordering.Same; | ||
while (true) | ||
{ | ||
|
@@ -214,13 +214,13 @@ private Ordering CompareOnlyTo(VersionVector other, Ordering order) | |
[DebuggerDisplay("VersionVector({Node}->{Version})")] | ||
public sealed class SingleVersionVector : VersionVector | ||
{ | ||
private sealed class Enumerator : IEnumerator<KeyValuePair<UniqueAddress, long>> | ||
private sealed class Enumerator : IEnumerator<(UniqueAddress, long)> | ||
{ | ||
private bool _moved = false; | ||
|
||
public Enumerator(UniqueAddress node, long version) | ||
{ | ||
Current = new KeyValuePair<UniqueAddress, long>(node, version); | ||
Current = (node, version); | ||
} | ||
|
||
/// <inheritdoc/> | ||
|
@@ -241,7 +241,7 @@ public void Reset() | |
_moved = false; | ||
} | ||
|
||
public KeyValuePair<UniqueAddress, long> Current { get; } | ||
public (UniqueAddress, long) Current { get; } | ||
|
||
object IEnumerator.Current => Current; | ||
} | ||
|
@@ -257,7 +257,7 @@ public SingleVersionVector(UniqueAddress node, long version) | |
|
||
public override bool IsEmpty => false; | ||
public override int Count => 1; | ||
public override IEnumerator<KeyValuePair<UniqueAddress, long>> VersionEnumerator => new Enumerator(Node, Version); | ||
public override IEnumerator<(UniqueAddress, long)> VersionEnumerator => new Enumerator(Node, Version); | ||
public override VersionVector Increment(UniqueAddress node) | ||
{ | ||
var v = Counter.GetAndIncrement(); | ||
|
@@ -317,6 +317,49 @@ public override int GetHashCode() | |
[Serializable] | ||
public sealed class MultiVersionVector : VersionVector | ||
{ | ||
internal class Enumerator : IEnumerator<(UniqueAddress, long)> | ||
{ | ||
private readonly (UniqueAddress, long)[] _backing; | ||
private readonly int _maxIndex; | ||
private int _currentIndex = -1; | ||
|
||
public Enumerator(ImmutableDictionary<UniqueAddress, long> versions) | ||
{ | ||
_backing = new (UniqueAddress, long)[versions.Count]; | ||
var index = 0; | ||
foreach (var kvp in versions) | ||
{ | ||
_backing[index] = (kvp.Key, kvp.Value); | ||
index++; | ||
} | ||
|
||
_maxIndex = _backing.Length - 1; | ||
} | ||
|
||
public bool MoveNext() | ||
{ | ||
_currentIndex++; | ||
if (_currentIndex > _maxIndex) | ||
{ | ||
_currentIndex = _maxIndex; | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
public void Reset() | ||
{ | ||
_currentIndex = 0; | ||
} | ||
|
||
public (UniqueAddress, long) Current | ||
=> _currentIndex == -1 ? (null, 0) : _backing[_currentIndex]; | ||
|
||
object IEnumerator.Current => Current; | ||
|
||
public void Dispose() { } | ||
} | ||
|
||
internal readonly ImmutableDictionary<UniqueAddress, long> Versions; | ||
|
||
public MultiVersionVector(params KeyValuePair<UniqueAddress, long>[] nodeVersions) | ||
|
@@ -336,7 +379,7 @@ public MultiVersionVector(ImmutableDictionary<UniqueAddress, long> nodeVersions) | |
|
||
public override bool IsEmpty => Versions.IsEmpty; | ||
public override int Count => Versions.Count; | ||
public override IEnumerator<KeyValuePair<UniqueAddress, long>> VersionEnumerator => Versions.GetEnumerator(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the second memory offender. Calling ImmutableDictionary GetEnumerator seemed to create a shallow copy of some of its internal data structure, consuming memory everytime it is called. Fix by converting this to a Tuple IEnumerator and our own much simpler IEnumerator implementation. |
||
public override IEnumerator<(UniqueAddress, long)> VersionEnumerator => new Enumerator(Versions); | ||
public override VersionVector Increment(UniqueAddress node) => | ||
new MultiVersionVector(Versions.SetItem(node, Counter.GetAndIncrement())); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,7 +40,7 @@ public int Compare(Address x, Address y) | |
if (result != 0) return result; | ||
result = string.CompareOrdinal(x.System, y.System); | ||
if (result != 0) return result; | ||
result = string.CompareOrdinal(x.Host ?? "", y.Host ?? ""); | ||
result = string.CompareOrdinal(x.Host ?? string.Empty, y.Host ?? string.Empty); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LGTM |
||
if (result != 0) return result; | ||
result = (x.Port ?? 0).CompareTo(y.Port ?? 0); | ||
return result; | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -202,20 +202,20 @@ public static int StringHash(string s) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
unchecked | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
var sChar = s.ToCharArray(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the biggest offending code. MurmurHash is called once for every DataEnvelope that are going to be sent as a gossip. For a system that are trying to gossip thousands of gossips, this can add up quite significantly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19041.985 (2004/May2020Update/20H1)
AMD Ryzen 7 1700, 1 CPU, 16 logical and 8 physical cores
.NET SDK=5.0.203
[Host] : .NET Core 3.1.15 (CoreCLR 4.700.21.21202, CoreFX 4.700.21.21402), X64 RyuJIT
DefaultJob : .NET Core 3.1.15 (CoreCLR 4.700.21.21202, CoreFX 4.700.21.21402), X64 RyuJIT
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is definitely a big improvement. |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
var span = s.AsSpan(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
var h = StartHash((uint)s.Length * StringSeed); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
var c = HiddenMagicA; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
var k = HiddenMagicB; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
var j = 0; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
while (j + 1 < s.Length) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
{ | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
var i = (uint)((sChar[j] << 16) + sChar[j + 1]); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
var i = (uint)((span[j] << 16) + span[j + 1]); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
h = ExtendHash(h, i, c, k); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
c = NextMagicA(c); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
k = NextMagicB(k); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
j += 2; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (j < s.Length) h = ExtendHash(h, sChar[j], c, k); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (j < s.Length) h = ExtendHash(h, span[j], c, k); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
return (int)FinalizeHash(h); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this a breaking API change or is this a subtype of an
internal
type above?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, both of these are breaking changes.
I thought about doing this in #4990 - but I decided to avoid the breaking API change by exposing an internal API that should be used for this.
I don't think the risks are that high of this breaking change having a significant negative impact as DData is either primarily used directly or inside of cluster sharding and these APIs aren't called directly... but it's a judgement call.