forked from akkadotnet/akka.net
/
Cluster.conf
410 lines (345 loc) · 18.5 KB
/
Cluster.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
######################################
# Akka Cluster Reference Config File #
######################################
# This is the reference config file that contains all the default settings.
# Make your edits/overrides in your application.conf.
akka {
cluster {
# Initial contact points of the cluster.
# The nodes to join automatically at startup.
# Comma separated full URIs defined by a string on the form of
# "akka.tcp://system@hostname:port"
# Leave as empty if the node is supposed to be joined manually.
seed-nodes = []
# How long to wait for one of the seed nodes to reply to initial join request.
# When this is the first seed node and there is no positive reply from the other
# seed nodes within this timeout it will join itself to bootstrap the cluster.
# When this is not the first seed node the join attempts will be performed with
# this interval.
seed-node-timeout = 5s
# If a join request fails it will be retried after this period.
# Disable join retry by specifying "off".
retry-unsuccessful-join-after = 10s
# The joining of given seed nodes will by default be retried indefinitely until
# a successful join. That process can be aborted if unsuccessful by defining this
# timeout. When aborted it will run CoordinatedShutdown, which by default will
# terminate the ActorSystem. CoordinatedShutdown can also be configured to exit
# the JVM. It is useful to define this timeout if the seed-nodes are assembled
# dynamically and a restart with new seed-nodes should be tried after unsuccessful
# attempts.
shutdown-after-unsuccessful-join-seed-nodes = off
# Should the 'leader' in the cluster be allowed to automatically mark
# unreachable nodes as DOWN after a configured time of unreachability?
# Using auto-down implies that two separate clusters will automatically be
# formed in case of network partition.
# Disable with "off" or specify a duration to enable auto-down.
# If a downing-provider-class is configured this setting is ignored.
auto-down-unreachable-after = off
# Time margin after which shards or singletons that belonged to a downed/removed
# partition are created in surviving partition. The purpose of this margin is that
# in case of a network partition the persistent actors in the non-surviving partitions
# must be stopped before corresponding persistent actors are started somewhere else.
# This is useful if you implement downing strategies that handle network partitions,
# e.g. by keeping the larger side of the partition and shutting down the smaller side.
# It will not add any extra safety for auto-down-unreachable-after, since that is not
# handling network partitions.
# Disable with "off" or specify a duration to enable.
down-removal-margin = off
# Pluggable support for downing of nodes in the cluster.
# If this setting is left empty behaviour will depend on 'auto-down-unreachable' in the following ways:
# * if it is 'off' the `NoDowning` provider is used and no automatic downing will be performed
# * if it is set to a duration the `AutoDowning` provider is with the configured downing duration
#
# If specified the value must be the fully qualified class name of a subclass of
# `akka.cluster.DowningProvider` having two argument constructor:
# - argument 1: accepting an `ActorSystem`
# - argument 2: accepting an `Akka.Cluster.Cluster`
downing-provider-class = ""
# If this is set to "off", the leader will not move 'Joining' members to 'Up' during a network
# split. This feature allows the leader to accept 'Joining' members to be 'WeaklyUp'
# so they become part of the cluster even during a network split. The leader will
# move `Joining` members to 'WeaklyUp' after this configured duration without convergence.
# The leader will move 'WeaklyUp' members to 'Up' status once convergence has been reached.
allow-weakly-up-members = 7s
# The roles of this member. List of strings, e.g. roles = ["A", "B"].
# The roles are part of the membership information and can be used by
# routers or other services to distribute work to certain member types,
# e.g. front-end and back-end nodes.
roles = []
# Application version of the deployment. Used by rolling update features
# to distinguish between old and new nodes. The typical convention is to use
# 3 digit version numbers `major.minor.patch`, but 1 or two digits are also
# supported.
#
# If no `.` is used it is interpreted as a single digit version number or as
# plain alphanumeric if it couldn't be parsed as a number.
#
# It may also have a qualifier at the end for 2 or 3 digit version numbers such
# as "1.2-RC1".
# For 1 digit with qualifier, 1-RC1, it is interpreted as plain alphanumeric.
#
# It has support for https://github.com/dwijnand/sbt-dynver format with `+` or
# `-` separator. The number of commits from the tag is handled as a numeric part.
# For example `1.0.0+3-73475dce26` is less than `1.0.10+10-ed316bd024` (3 < 10).
#
# DEFAULT: by default the app-version will default to the entry assembly's version,
# i.e. the assembly of the executable running `Program.cs`
#
# Values can be "assembly-version" or a version string as defined above, i.e.
# app-version = "1.0.0"
# app-version = "1.1-beta1"
# app-version = "1"
# app-version = "1.1"
app-version = assembly-version
# Run the coordinated shutdown from phase 'cluster-shutdown' when the cluster
# is shutdown for other reasons than when leaving, e.g. when downing. This
# will terminate the ActorSystem when the cluster extension is shutdown.
run-coordinated-shutdown-when-down = on
role {
# Minimum required number of members of a certain role before the leader
# changes member status of 'Joining' members to 'Up'. Typically used together
# with 'Cluster.registerOnMemberUp' to defer some action, such as starting
# actors, until the cluster has reached a certain size.
# E.g. to require 2 nodes with role 'frontend' and 3 nodes with role 'backend':
# frontend.min-nr-of-members = 2
# backend.min-nr-of-members = 3
#<role-name>.min-nr-of-members = 1
}
# Minimum required number of members before the leader changes member status
# of 'Joining' members to 'Up'. Typically used together with
# 'Cluster.registerOnMemberUp' to defer some action, such as starting actors,
# until the cluster has reached a certain size.
min-nr-of-members = 1
# Enable/disable info level logging of cluster events
log-info = on
# Enable/disable verbose info-level logging of cluster events
# for temporary troubleshooting. Defaults to 'off'.
log-info-verbose = off
# how long should the node wait before starting the periodic tasks
# maintenance tasks?
periodic-tasks-initial-delay = 1s
# how often should the node send out gossip information?
gossip-interval = 1s
# discard incoming gossip messages if not handled within this duration
gossip-time-to-live = 2s
# how often should the leader perform maintenance tasks?
leader-actions-interval = 1s
# how often should the node move nodes, marked as unreachable by the failure
# detector, out of the membership ring?
unreachable-nodes-reaper-interval = 1s
# How often the current internal stats should be published.
# A value of 0s can be used to always publish the stats, when it happens.
# Disable with "off".
publish-stats-interval = off
# The id of the dispatcher to use for cluster actors.
# If not specified, the internal dispatcher is used.
# If specified you need to define the settings of the actual dispatcher.
use-dispatcher = ""
# Gossip to random node with newer or older state information, if any with
# this probability. Otherwise Gossip to any random live node.
# Probability value is between 0.0 and 1.0. 0.0 means never, 1.0 means always.
gossip-different-view-probability = 0.8
# Reduced the above probability when the number of nodes in the cluster
# greater than this value.
reduce-gossip-different-view-probability = 400
# Settings for the Phi accrual failure detector (http://ddg.jaist.ac.jp/pub/HDY+04.pdf
# [Hayashibara et al]) used by the cluster subsystem to detect unreachable
# members.
failure-detector {
# FQCN of the failure detector implementation.
# It must implement akka.remote.FailureDetector and have
# a public constructor with a com.typesafe.config.Config and
# akka.actor.EventStream parameter.
implementation-class = "Akka.Remote.PhiAccrualFailureDetector, Akka.Remote"
# How often keep-alive heartbeat messages should be sent to each connection.
heartbeat-interval = 1 s
# Defines the failure detector threshold.
# A low threshold is prone to generate many wrong suspicions but ensures
# a quick detection in the event of a real crash. Conversely, a high
# threshold generates fewer mistakes but needs more time to detect
# actual crashes.
threshold = 8.0
# Number of the samples of inter-heartbeat arrival times to adaptively
# calculate the failure timeout for connections.
max-sample-size = 1000
# Minimum standard deviation to use for the normal distribution in
# AccrualFailureDetector. Too low standard deviation might result in
# too much sensitivity for sudden, but normal, deviations in heartbeat
# inter arrival times.
min-std-deviation = 100 ms
# Number of potentially lost/delayed heartbeats that will be
# accepted before considering it to be an anomaly.
# This margin is important to be able to survive sudden, occasional,
# pauses in heartbeat arrivals, due to for example garbage collect or
# network drop.
acceptable-heartbeat-pause = 3 s
# Number of member nodes that each member will send heartbeat messages to,
# i.e. each node will be monitored by this number of other nodes.
monitored-by-nr-of-members = 9
# After the heartbeat request has been sent the first failure detection
# will start after this period, even though no heartbeat mesage has
# been received.
expected-response-after = 1 s
}
# If the tick-duration of the default scheduler is longer than the
# tick-duration configured here a dedicated scheduler will be used for
# periodic tasks of the cluster, otherwise the default scheduler is used.
# See akka.scheduler settings for more details.
scheduler {
tick-duration = 33ms
ticks-per-wheel = 512
}
debug {
# log heartbeat events (very verbose, useful mostly when debugging heartbeating issues)
verbose-heartbeat-logging = off
# log gossip merge events (very verbose, useful when debugging convergence issues)
verbose-receive-gossip-logging = off
}
}
# Default configuration for routers
actor.deployment.default {
# MetricsSelector to use
# - available: "mix", "heap", "cpu", "load"
# - or: Fully qualified class name of the MetricsSelector class.
# The class must extend akka.cluster.routing.MetricsSelector
# and have a public constructor with com.typesafe.config.Config
# parameter.
# - default is "mix"
metrics-selector = mix
}
actor.deployment.default.cluster {
# enable cluster aware router that deploys to nodes in the cluster
enabled = off
# Maximum number of routees that will be deployed on each cluster
# member node.
# Note that max-total-nr-of-instances defines total number of routees, but
# number of routees per node will not be exceeded, i.e. if you
# define max-total-nr-of-instances = 50 and max-nr-of-instances-per-node = 2
# it will deploy 2 routees per new member in the cluster, up to
# 25 members.
max-nr-of-instances-per-node = 1
# Maximum number of routees that will be deployed, in total
# on all nodes. See also description of max-nr-of-instances-per-node.
# For backwards compatibility reasons, nr-of-instances
# has the same purpose as max-total-nr-of-instances for cluster
# aware routers and nr-of-instances (if defined by user) takes
# precedence over max-total-nr-of-instances.
max-total-nr-of-instances = 10000
# Defines if routees are allowed to be located on the same node as
# the head router actor, or only on remote nodes.
# Useful for master-worker scenario where all routees are remote.
allow-local-routees = on
# Use members with specified role, or all members if undefined or empty.
use-role = ""
}
# Protobuf serializer for cluster messages
actor {
serializers {
akka-cluster = "Akka.Cluster.Serialization.ClusterMessageSerializer, Akka.Cluster"
}
serialization-bindings {
"Akka.Cluster.IClusterMessage, Akka.Cluster" = akka-cluster
"Akka.Cluster.Routing.ClusterRouterPool, Akka.Cluster" = akka-cluster
}
serialization-identifiers {
"Akka.Cluster.Serialization.ClusterMessageSerializer, Akka.Cluster" = 5
}
}
}
# split-brain-resolver
# To enable the split brain resolver you first need to enable the provider in your application.conf:
# for old split brain resolver:
# akka.cluster.downing-provider-class = "Akka.Cluster.SplitBrainResolver"
# for new split brain resolver:
# akka.cluster.downing-provider-class = "Akka.Cluster.SBR.SplitBrainResolverProvider"
akka.cluster.split-brain-resolver {
# Select one of the available strategies (see descriptions below):
# static-quorum, keep-majority, keep-oldest, down-all, lease-majority, (keep-referee)
# keep-referee - supported only with the old split brain resolver
active-strategy = keep-majority
# Decision is taken by the strategy when there has been no membership or
# reachability changes for this duration, i.e. the cluster state is stable.
stable-after = 20s
# When reachability observations by the failure detector are changed the SBR decisions
# are deferred until there are no changes within the 'stable-after' duration.
# If this continues for too long it might be an indication of an unstable system/network
# and it could result in delayed or conflicting decisions on separate sides of a network
# partition.
# As a precaution for that scenario all nodes are downed if no decision is made within
# `stable-after + down-all-when-unstable` from the first unreachability event.
# The measurement is reset if all unreachable have been healed, downed or removed, or
# if there are no changes within `stable-after * 2`.
# The value can be on, off, or a duration.
# By default it is 'on' and then it is derived to be 3/4 of stable-after, but not less than
# 4 seconds.
# supported only with the new split brain resolver
down-all-when-unstable = on
}
# Down the unreachable nodes if the number of remaining nodes are greater than or equal to
# the given 'quorum-size'. Otherwise down the reachable nodes, i.e. it will shut down that
# side of the partition. In other words, the 'size' defines the minimum number of nodes
# that the cluster must have to be operational. If there are unreachable nodes when starting
# up the cluster, before reaching this limit, the cluster may shutdown itself immediately.
# This is not an issue if you start all nodes at approximately the same time.
#
# Note that you must not add more members to the cluster than 'quorum-size * 2 - 1', because
# then both sides may down each other and thereby form two separate clusters. For example,
# quorum-size configured to 3 in a 6 node cluster may result in a split where each side
# consists of 3 nodes each, i.e. each side thinks it has enough nodes to continue by
# itself. A warning is logged if this recommendation is violated.
akka.cluster.split-brain-resolver.static-quorum {
# minimum number of nodes that the cluster must have
quorum-size = undefined
# if the 'role' is defined the decision is based only on members with that 'role'
role = ""
}
# Down the unreachable nodes if the current node is in the majority part based the last known
# membership information. Otherwise down the reachable nodes, i.e. the own part. If the
# the parts are of equal size the part containing the node with the lowest address is kept.
# Note that if there are more than two partitions and none is in majority each part
# will shutdown itself, terminating the whole cluster.
akka.cluster.split-brain-resolver.keep-majority {
# if the 'role' is defined the decision is based only on members with that 'role'
role = ""
}
# Down the part that does not contain the oldest member (current singleton).
#
# There is one exception to this rule if 'down-if-alone' is defined to 'on'.
# Then, if the oldest node has partitioned from all other nodes the oldest
# will down itself and keep all other nodes running. The strategy will not
# down the single oldest node when it is the only remaining node in the cluster.
#
# Note that if the oldest node crashes the others will remove it from the cluster
# when 'down-if-alone' is 'on', otherwise they will down themselves if the
# oldest node crashes, i.e. shutdown the whole cluster together with the oldest node.
akka.cluster.split-brain-resolver.keep-oldest {
# Enable downing of the oldest node when it is partitioned from all other nodes
down-if-alone = on
# if the 'role' is defined the decision is based only on members with that 'role',
# i.e. using the oldest member (singleton) within the nodes with that role
role = ""
}
# Keep the part that can acquire the lease, and down the other part.
# Best effort is to keep the side that has most nodes, i.e. the majority side.
# This is achieved by adding a delay before trying to acquire the lease on the
# minority side.
# supported only with the new split brain resolver
akka.cluster.split-brain-resolver.lease-majority {
lease-implementation = ""
# The recommended format for the lease name is "<service-name>-akka-sbr".
# When lease-name is not defined, the name will be set to "<actor-system-name>-akka-sbr"
lease-name = ""
# This delay is used on the minority side before trying to acquire the lease,
# as an best effort to try to keep the majority side.
acquire-lease-delay-for-minority = 2s
# Release the lease after this duration.
release-after = 40s
# If the 'role' is defined the majority/minority is based only on members with that 'role'.
role = ""
}
# supported only with the old split brain resolver
akka.cluster.split-brain-resolver.keep-referee {
# referee address on the form of "akka.tcp://system@hostname:port"
address = ""
down-all-if-less-than-nodes = 1
}