forked from akkadotnet/akka.net
/
AutoDown.cs
308 lines (273 loc) · 10.1 KB
/
AutoDown.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
//-----------------------------------------------------------------------
// <copyright file="AutoDown.cs" company="Akka.NET Project">
// Copyright (C) 2009-2021 Lightbend Inc. <http://www.lightbend.com>
// Copyright (C) 2013-2021 .NET Foundation <https://github.com/akkadotnet/akka.net>
// </copyright>
//-----------------------------------------------------------------------
using System;
using System.Collections.Immutable;
using Akka.Actor;
using Akka.Event;
using Akka.Configuration;
using static Akka.Cluster.MembershipState;
namespace Akka.Cluster
{
/// <summary>
/// INTERNAL API
///
/// An unreachable member will be downed by this actor if it remains unreachable
/// for the specified duration and this actor is running on the leader node in the
/// cluster.
///
/// The implementation is split into two classes AutoDown and AutoDownBase to be
/// able to unit test the logic without running cluster.
/// </summary>
internal sealed class AutoDown : AutoDownBase
{
/// <summary>
/// TBD
/// </summary>
/// <param name="autoDownUnreachableAfter">TBD</param>
/// <param name="cluster"></param>
/// <returns>TBD</returns>
public static Props Props(TimeSpan autoDownUnreachableAfter, Cluster cluster)
{
return Actor.Props.Create(() => new AutoDown(autoDownUnreachableAfter, cluster));
}
/// <summary>
/// TBD
/// </summary>
public sealed class UnreachableTimeout
{
/// <summary>
/// TBD
/// </summary>
public UniqueAddress Node { get; }
/// <summary>
/// TBD
/// </summary>
/// <param name="node">TBD</param>
public UnreachableTimeout(UniqueAddress node)
{
Node = node;
}
private bool Equals(UnreachableTimeout other)
{
return Equals(Node, other.Node);
}
/// <inheritdoc/>
public override bool Equals(object obj)
{
if (ReferenceEquals(null, obj)) return false;
if (ReferenceEquals(this, obj)) return true;
return obj is UnreachableTimeout && Equals((UnreachableTimeout)obj);
}
/// <inheritdoc/>
public override int GetHashCode()
{
return (Node != null ? Node.GetHashCode() : 0);
}
}
private readonly Cluster _cluster;
public AutoDown(TimeSpan autoDownUnreachableAfter, Cluster cluster) : base(autoDownUnreachableAfter)
{
_cluster = cluster;
}
/// <summary>
/// TBD
/// </summary>
public override Address SelfAddress
{
get { return _cluster.SelfAddress; }
}
/// <summary>
/// TBD
/// </summary>
public override IScheduler Scheduler
{
get { return _cluster.Scheduler; }
}
/// <summary>
/// TBD
/// </summary>
protected override void PreStart()
{
_cluster.Subscribe(Self,new []{ typeof(ClusterEvent.IClusterDomainEvent)});
base.PreStart();
}
/// <summary>
/// TBD
/// </summary>
protected override void PostStop()
{
_cluster.Unsubscribe(Self);
base.PostStop();
}
/// <summary>
/// TBD
/// </summary>
/// <param name="node">TBD</param>
/// <exception cref="InvalidOperationException">
/// This exception is thrown when a non-leader tries to down the specified <paramref name="node"/>.
/// </exception>
public override void Down(Address node)
{
if(!_leader) throw new InvalidOperationException("Must be leader to down node");
_cluster.LogInfo("Leader is auto-downing unreachable node [{0}]", node);
_cluster.Down(node);
}
}
/// <summary>
/// TBD
/// </summary>
internal abstract class AutoDownBase : UntypedActor
{
private readonly ImmutableHashSet<MemberStatus> _skipMemberStatus =
ConvergenceSkipUnreachableWithMemberStatus;
private ImmutableDictionary<UniqueAddress, ICancelable> _scheduledUnreachable =
ImmutableDictionary.Create<UniqueAddress, ICancelable>();
private ImmutableHashSet<UniqueAddress> _pendingUnreachable = ImmutableHashSet.Create<UniqueAddress>();
/// <summary>
/// TBD
/// </summary>
protected bool _leader = false;
readonly TimeSpan _autoDownUnreachableAfter;
/// <summary>
/// TBD
/// </summary>
/// <param name="autoDownUnreachableAfter">TBD</param>
protected AutoDownBase(TimeSpan autoDownUnreachableAfter)
{
_autoDownUnreachableAfter = autoDownUnreachableAfter;
}
/// <summary>
/// TBD
/// </summary>
protected override void PostStop()
{
foreach (var tokenSource in _scheduledUnreachable.Values) tokenSource.Cancel();
}
/// <summary>
/// TBD
/// </summary>
public abstract Address SelfAddress { get; }
/// <summary>
/// TBD
/// </summary>
public abstract IScheduler Scheduler { get; }
/// <summary>
/// TBD
/// </summary>
/// <param name="node">TBD</param>
public abstract void Down(Address node);
/// <summary>
/// TBD
/// </summary>
/// <param name="message">TBD</param>
protected override void OnReceive(object message)
{
switch (message)
{
case ClusterEvent.CurrentClusterState state:
_leader = state.Leader != null && state.Leader.Equals(SelfAddress);
foreach (var m in state.Unreachable) UnreachableMember(m);
return;
case ClusterEvent.UnreachableMember unreachableMember:
UnreachableMember(unreachableMember.Member);
return;
case ClusterEvent.ReachableMember reachableMember:
Remove(reachableMember.Member.UniqueAddress);
return;
case ClusterEvent.MemberRemoved memberRemoved:
Remove(memberRemoved.Member.UniqueAddress);
return;
case ClusterEvent.LeaderChanged leaderChanged:
_leader = leaderChanged.Leader != null && leaderChanged.Leader.Equals(SelfAddress);
if (_leader)
{
foreach(var node in _pendingUnreachable) Down(node.Address);
_pendingUnreachable = ImmutableHashSet.Create<UniqueAddress>();
}
return;
case AutoDown.UnreachableTimeout unreachableTimeout:
if (_scheduledUnreachable.ContainsKey(unreachableTimeout.Node))
{
_scheduledUnreachable = _scheduledUnreachable.Remove(unreachableTimeout.Node);
DownOrAddPending(unreachableTimeout.Node);
}
return;
}
}
private void UnreachableMember(Member m)
{
if(!_skipMemberStatus.Contains(m.Status) && !_scheduledUnreachable.ContainsKey(m.UniqueAddress))
ScheduleUnreachable(m.UniqueAddress);
}
private void ScheduleUnreachable(UniqueAddress node)
{
if (_autoDownUnreachableAfter == TimeSpan.Zero)
{
DownOrAddPending(node);
}
else
{
var cancelable = Scheduler.ScheduleTellOnceCancelable(_autoDownUnreachableAfter, Self, new AutoDown.UnreachableTimeout(node), Self);
_scheduledUnreachable = _scheduledUnreachable.Add(node, cancelable);
}
}
private void DownOrAddPending(UniqueAddress node)
{
if (_leader)
{
Down(node.Address);
}
else
{
// it's supposed to be downed by another node, current leader, but if that crash
// a new leader must pick up these
_pendingUnreachable = _pendingUnreachable.Add(node);
}
}
private void Remove(UniqueAddress node)
{
if(_scheduledUnreachable.TryGetValue(node, out var source))
source.Cancel();
_scheduledUnreachable = _scheduledUnreachable.Remove(node);
_pendingUnreachable = _pendingUnreachable.Remove(node);
}
public ILoggingAdapter Log { get; private set; }
}
/// <summary>
/// Used when no custom provider is configured and 'auto-down-unreachable-after' is enabled.
/// </summary>
public sealed class AutoDowning : IDowningProvider
{
private readonly ActorSystem _system;
private readonly Cluster _cluster;
public AutoDowning(ActorSystem system, Cluster cluster)
{
_system = system;
_cluster = cluster;
}
/// <summary>
/// TBD
/// </summary>
public TimeSpan DownRemovalMargin => _cluster.Settings.DownRemovalMargin;
/// <summary>
/// TBD
/// </summary>
/// <exception cref="ConfigurationException">
/// This exception is thrown when the <c>akka.cluster.auto-down-unreachable-after</c> configuration setting is not set.
/// </exception>
public Props DowningActorProps
{
get
{
var autoDownUnreachableAfter = _cluster.Settings.AutoDownUnreachableAfter;
if (!autoDownUnreachableAfter.HasValue)
throw new ConfigurationException("AutoDowning downing provider selected but 'akka.cluster.auto-down-unreachable-after' not set");
return AutoDown.Props(autoDownUnreachableAfter.Value, _cluster);
}
}
}
}