-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
server.go
1707 lines (1454 loc) · 55.5 KB
/
server.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package consul
import (
"context"
"errors"
"fmt"
"io"
"io/ioutil"
"net"
"os"
"path/filepath"
"reflect"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/armon/go-metrics"
"github.com/hashicorp/consul-net-rpc/net/rpc"
connlimit "github.com/hashicorp/go-connlimit"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-memdb"
"github.com/hashicorp/go-version"
"github.com/hashicorp/raft"
autopilot "github.com/hashicorp/raft-autopilot"
raftboltdb "github.com/hashicorp/raft-boltdb/v2"
"github.com/hashicorp/serf/serf"
"go.etcd.io/bbolt"
"golang.org/x/time/rate"
"google.golang.org/grpc"
"github.com/hashicorp/consul/acl"
"github.com/hashicorp/consul/agent/consul/authmethod"
"github.com/hashicorp/consul/agent/consul/authmethod/ssoauth"
"github.com/hashicorp/consul/agent/consul/fsm"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/consul/stream"
"github.com/hashicorp/consul/agent/consul/usagemetrics"
"github.com/hashicorp/consul/agent/consul/wanfed"
agentgrpc "github.com/hashicorp/consul/agent/grpc/private"
"github.com/hashicorp/consul/agent/grpc/private/services/subscribe"
"github.com/hashicorp/consul/agent/grpc/public/services/connectca"
"github.com/hashicorp/consul/agent/grpc/public/services/dataplane"
"github.com/hashicorp/consul/agent/grpc/public/services/serverdiscovery"
"github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/consul/agent/pool"
"github.com/hashicorp/consul/agent/router"
"github.com/hashicorp/consul/agent/rpc/middleware"
"github.com/hashicorp/consul/agent/rpc/peering"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/lib/routine"
"github.com/hashicorp/consul/logging"
"github.com/hashicorp/consul/proto/pbpeering"
"github.com/hashicorp/consul/proto/pbsubscribe"
"github.com/hashicorp/consul/tlsutil"
"github.com/hashicorp/consul/types"
)
// NOTE The "consul.client.rpc" and "consul.client.rpc.exceeded" counters are defined in consul/client.go
// These are the protocol versions that Consul can _understand_. These are
// Consul-level protocol versions, that are used to configure the Serf
// protocol versions.
const (
DefaultRPCProtocol = 2
ProtocolVersionMin uint8 = 2
// Version 3 added support for network coordinates but we kept the
// default protocol version at 2 to ease the transition to this new
// feature. A Consul agent speaking version 2 of the protocol will
// attempt to send its coordinates to a server who understands version
// 3 or greater.
ProtocolVersion2Compatible = 2
ProtocolVersionMax = 3
)
const (
serfLANSnapshot = "serf/local.snapshot"
serfWANSnapshot = "serf/remote.snapshot"
raftState = "raft/"
snapshotsRetained = 2
// raftLogCacheSize is the maximum number of logs to cache in-memory.
// This is used to reduce disk I/O for the recently committed entries.
raftLogCacheSize = 512
// raftRemoveGracePeriod is how long we wait to allow a RemovePeer
// to replicate to gracefully leave the cluster.
raftRemoveGracePeriod = 5 * time.Second
// serfEventChSize is the size of the buffered channel to get Serf
// events. If this is exhausted we will block Serf and Memberlist.
serfEventChSize = 2048
// reconcileChSize is the size of the buffered channel reconcile updates
// from Serf with the Catalog. If this is exhausted we will drop updates,
// and wait for a periodic reconcile.
reconcileChSize = 256
LeaderTransferMinVersion = "1.6.0"
)
const (
aclPolicyReplicationRoutineName = "ACL policy replication"
aclRoleReplicationRoutineName = "ACL role replication"
aclTokenReplicationRoutineName = "ACL token replication"
aclTokenReapingRoutineName = "acl token reaping"
aclUpgradeRoutineName = "legacy ACL token upgrade"
caRootPruningRoutineName = "CA root pruning"
caRootMetricRoutineName = "CA root expiration metric"
caSigningMetricRoutineName = "CA signing expiration metric"
configReplicationRoutineName = "config entry replication"
federationStateReplicationRoutineName = "federation state replication"
federationStateAntiEntropyRoutineName = "federation state anti-entropy"
federationStatePruningRoutineName = "federation state pruning"
intentionMigrationRoutineName = "intention config entry migration"
secondaryCARootWatchRoutineName = "secondary CA roots watch"
intermediateCertRenewWatchRoutineName = "intermediate cert renew watch"
backgroundCAInitializationRoutineName = "CA initialization"
virtualIPCheckRoutineName = "virtual IP version check"
peeringStreamsRoutineName = "streaming peering resources"
)
var (
ErrWANFederationDisabled = fmt.Errorf("WAN Federation is disabled")
)
const (
PoolKindPartition = "partition"
PoolKindSegment = "segment"
)
// Server is Consul server which manages the service discovery,
// health checking, DC forwarding, Raft, and multiple Serf pools.
type Server struct {
// queriesBlocking is a counter that we incr and decr atomically in
// rpc calls to provide telemetry on how many blocking queries are running.
// We interact with queriesBlocking atomically, do not move without ensuring it is
// correctly 64-byte aligned in the struct layout
queriesBlocking uint64
// aclConfig is the configuration for the ACL system
aclConfig *acl.Config
// acls is used to resolve tokens to effective policies
*ACLResolver
aclAuthMethodValidators authmethod.Cache
// autopilot is the Autopilot instance for this server.
autopilot *autopilot.Autopilot
// caManager is used to synchronize CA operations across the leader and RPC functions.
caManager *CAManager
// rate limiter to use when signing leaf certificates
caLeafLimiter connectSignRateLimiter
// Consul configuration
config *Config
// configReplicator is used to manage the leaders replication routines for
// centralized config
configReplicator *Replicator
// federationStateReplicator is used to manage the leaders replication routines for
// federation states
federationStateReplicator *Replicator
// dcSupportsFederationStates is used to determine whether we can
// replicate federation states or not. All servers in the local
// DC must be on a version of Consul supporting federation states
// before this will get enabled.
dcSupportsFederationStates int32
// tokens holds ACL tokens initially from the configuration, but can
// be updated at runtime, so should always be used instead of going to
// the configuration directly.
tokens *token.Store
// Connection pool to other consul servers
connPool *pool.ConnPool
// Connection pool to other consul servers using gRPC
grpcConnPool GRPCClientConner
// eventChLAN is used to receive events from the
// serf cluster in the datacenter
eventChLAN chan serf.Event
// eventChWAN is used to receive events from the
// serf cluster that spans datacenters
eventChWAN chan serf.Event
// wanMembershipNotifyCh is used to receive notifications that the the
// serfWAN wan pool may have changed.
//
// If this is nil, notification is skipped.
wanMembershipNotifyCh chan struct{}
// fsm is the state machine used with Raft to provide
// strong consistency.
fsm *fsm.FSM
// Logger uses the provided LogOutput
logger hclog.InterceptLogger
loggers *loggerStore
// The raft instance is used among Consul nodes within the DC to protect
// operations that require strong consistency.
// the state directly.
raft *raft.Raft
raftLayer *RaftLayer
raftStore *raftboltdb.BoltStore
raftTransport *raft.NetworkTransport
raftInmem *raft.InmemStore
// raftNotifyCh is set up by setupRaft() and ensures that we get reliable leader
// transition notifications from the Raft layer.
raftNotifyCh <-chan bool
// reconcileCh is used to pass events from the serf handler
// into the leader manager, so that the strong state can be
// updated
reconcileCh chan serf.Member
// readyForConsistentReads is used to track when the leader server is
// ready to serve consistent reads, after it has applied its initial
// barrier. This is updated atomically.
readyForConsistentReads int32
// leaveCh is used to signal that the server is leaving the cluster
// and trying to shed its RPC traffic onto other Consul servers. This
// is only ever closed.
leaveCh chan struct{}
// publicConnectCAServer serves the Connect CA service exposed on the public
// gRPC port. It is also exposed on the private multiplexed "server" port to
// enable RPC forwarding.
publicConnectCAServer *connectca.Server
// publicGRPCServer is the gRPC server exposed on the dedicated gRPC port, as
// opposed to the multiplexed "server" port which is served by grpcHandler.
publicGRPCServer *grpc.Server
// router is used to map out Consul servers in the WAN and in Consul
// Enterprise user-defined areas.
router *router.Router
// rpcLimiter is used to rate limit the total number of RPCs initiated
// from an agent.
rpcLimiter atomic.Value
// rpcConnLimiter limits the number of RPC connections from a single source IP
rpcConnLimiter connlimit.Limiter
// Listener is used to listen for incoming connections
Listener net.Listener
grpcHandler connHandler
rpcServer *rpc.Server
// insecureRPCServer is a RPC server that is configure with
// IncomingInsecureRPCConfig to allow clients to call AutoEncrypt.Sign
// to request client certificates. At this point a client doesn't have
// a client cert and thus cannot present it. This is the only RPC
// Endpoint that is available at the time of writing.
insecureRPCServer *rpc.Server
// rpcRecorder is a middleware component that can emit RPC request metrics.
rpcRecorder *middleware.RequestRecorder
// tlsConfigurator holds the agent configuration relevant to TLS and
// configures everything related to it.
tlsConfigurator *tlsutil.Configurator
// serfLAN is the Serf cluster maintained inside the DC
// which contains all the DC nodes
//
// - If Network Segments are active, this only contains members in the
// default segment.
//
// - If Admin Partitions are active, this only contains members in the
// default partition.
//
serfLAN *serf.Serf
// serfWAN is the Serf cluster maintained between DC's
// which SHOULD only consist of Consul servers
serfWAN *serf.Serf
serfWANConfig *serf.Config
memberlistTransportWAN wanfed.IngestionAwareTransport
gatewayLocator *GatewayLocator
// serverLookup tracks server consuls in the local datacenter.
// Used to do leader forwarding and provide fast lookup by server id and address
serverLookup *ServerLookup
// grpcLeaderForwarder is notified on leader change in order to keep the grpc
// resolver up to date.
grpcLeaderForwarder LeaderForwarder
// floodLock controls access to floodCh.
floodLock sync.RWMutex
floodCh []chan struct{}
// sessionTimers track the expiration time of each Session that has
// a TTL. On expiration, a SessionDestroy event will occur, and
// destroy the session via standard session destroy processing
sessionTimers *SessionTimers
// statsFetcher is used by autopilot to check the status of the other
// Consul router.
statsFetcher *StatsFetcher
// overviewManager is used to periodically update the cluster overview
// and emit node/service/check health metrics.
overviewManager *OverviewManager
// reassertLeaderCh is used to signal the leader loop should re-run
// leadership actions after a snapshot restore.
reassertLeaderCh chan chan error
// tombstoneGC is used to track the pending GC invocations
// for the KV tombstones
tombstoneGC *state.TombstoneGC
// aclReplicationStatus (and its associated lock) provide information
// about the health of the ACL replication goroutine.
aclReplicationStatus structs.ACLReplicationStatus
aclReplicationStatusLock sync.RWMutex
// shutdown and the associated members here are used in orchestrating
// a clean shutdown. The shutdownCh is never written to, only closed to
// indicate a shutdown has been initiated.
shutdown bool
shutdownCh chan struct{}
shutdownLock sync.Mutex
// dcSupportsIntentionsAsConfigEntries is used to determine whether we can
// migrate old intentions into service-intentions config entries. All
// servers in the local DC must be on a version of Consul supporting
// service-intentions before this will get enabled.
dcSupportsIntentionsAsConfigEntries int32
// Manager to handle starting/stopping go routines when establishing/revoking raft leadership
leaderRoutineManager *routine.Manager
// publisher is the EventPublisher to be shared amongst various server components. Events from
// modifications to the FSM, autopilot and others will flow through here. If in the future we
// need Events generated outside of the Server and all its components, then we could move
// this into the Deps struct and created it much earlier on.
publisher *stream.EventPublisher
// peering is a service used to handle peering streams.
peeringService *peering.Service
// embedded struct to hold all the enterprise specific data
EnterpriseServer
}
type connHandler interface {
Run() error
Handle(conn net.Conn)
Shutdown() error
}
// NewServer is used to construct a new Consul server from the configuration
// and extra options, potentially returning an error.
func NewServer(config *Config, flat Deps, publicGRPCServer *grpc.Server) (*Server, error) {
logger := flat.Logger
if err := config.CheckProtocolVersion(); err != nil {
return nil, err
}
if config.DataDir == "" && !config.DevMode {
return nil, fmt.Errorf("Config must provide a DataDir")
}
if err := config.CheckACL(); err != nil {
return nil, err
}
// Create the tombstone GC.
gc, err := state.NewTombstoneGC(config.TombstoneTTL, config.TombstoneTTLGranularity)
if err != nil {
return nil, err
}
// Create the shutdown channel - this is closed but never written to.
shutdownCh := make(chan struct{})
serverLogger := flat.Logger.NamedIntercept(logging.ConsulServer)
loggers := newLoggerStore(serverLogger)
eventPublisher := stream.NewEventPublisher(10 * time.Second)
fsmDeps := fsm.Deps{
Logger: flat.Logger,
NewStateStore: func() *state.Store {
return state.NewStateStoreWithEventPublisher(gc, eventPublisher)
},
Publisher: eventPublisher,
}
// Create server.
s := &Server{
config: config,
tokens: flat.Tokens,
connPool: flat.ConnPool,
grpcConnPool: flat.GRPCConnPool,
eventChLAN: make(chan serf.Event, serfEventChSize),
eventChWAN: make(chan serf.Event, serfEventChSize),
logger: serverLogger,
loggers: loggers,
leaveCh: make(chan struct{}),
reconcileCh: make(chan serf.Member, reconcileChSize),
router: flat.Router,
tlsConfigurator: flat.TLSConfigurator,
publicGRPCServer: publicGRPCServer,
reassertLeaderCh: make(chan chan error),
sessionTimers: NewSessionTimers(),
tombstoneGC: gc,
serverLookup: NewServerLookup(),
shutdownCh: shutdownCh,
leaderRoutineManager: routine.NewManager(logger.Named(logging.Leader)),
aclAuthMethodValidators: authmethod.NewCache(),
fsm: fsm.NewFromDeps(fsmDeps),
publisher: eventPublisher,
}
var recorder *middleware.RequestRecorder
if flat.NewRequestRecorderFunc != nil {
recorder = flat.NewRequestRecorderFunc(serverLogger, s.IsLeader, s.config.Datacenter)
} else {
return nil, fmt.Errorf("cannot initialize server without an RPC request recorder provider")
}
if recorder == nil {
return nil, fmt.Errorf("cannot initialize server with a nil RPC request recorder")
}
if flat.GetNetRPCInterceptorFunc == nil {
s.rpcServer = rpc.NewServer()
s.insecureRPCServer = rpc.NewServer()
} else {
s.rpcServer = rpc.NewServerWithOpts(rpc.WithServerServiceCallInterceptor(flat.GetNetRPCInterceptorFunc(recorder)))
s.insecureRPCServer = rpc.NewServerWithOpts(rpc.WithServerServiceCallInterceptor(flat.GetNetRPCInterceptorFunc(recorder)))
}
s.rpcRecorder = recorder
go s.publisher.Run(&lib.StopChannelContext{StopCh: s.shutdownCh})
if s.config.ConnectMeshGatewayWANFederationEnabled {
s.gatewayLocator = NewGatewayLocator(
s.logger,
s,
s.config.Datacenter,
s.config.PrimaryDatacenter,
)
s.connPool.GatewayResolver = s.gatewayLocator.PickGateway
s.grpcConnPool.SetGatewayResolver(s.gatewayLocator.PickGateway)
}
// Initialize enterprise specific server functionality
if err := s.initEnterprise(flat); err != nil {
s.Shutdown()
return nil, err
}
initLeaderMetrics()
s.rpcLimiter.Store(rate.NewLimiter(config.RPCRateLimit, config.RPCMaxBurst))
configReplicatorConfig := ReplicatorConfig{
Name: logging.ConfigEntry,
Delegate: &FunctionReplicator{ReplicateFn: s.replicateConfig, Name: "config-entries"},
Rate: s.config.ConfigReplicationRate,
Burst: s.config.ConfigReplicationBurst,
Logger: s.logger,
}
s.configReplicator, err = NewReplicator(&configReplicatorConfig)
if err != nil {
s.Shutdown()
return nil, err
}
federationStateReplicatorConfig := ReplicatorConfig{
Name: logging.FederationState,
Delegate: &IndexReplicator{
Delegate: &FederationStateReplicator{
srv: s,
gatewayLocator: s.gatewayLocator,
},
Logger: s.loggers.Named(logging.Replication).Named(logging.FederationState),
},
Rate: s.config.FederationStateReplicationRate,
Burst: s.config.FederationStateReplicationBurst,
Logger: s.logger,
SuppressErrorLog: isErrFederationStatesNotSupported,
}
s.federationStateReplicator, err = NewReplicator(&federationStateReplicatorConfig)
if err != nil {
s.Shutdown()
return nil, err
}
// Initialize the stats fetcher that autopilot will use.
s.statsFetcher = NewStatsFetcher(logger, s.connPool, s.config.Datacenter)
partitionInfo := serverPartitionInfo(s)
s.aclConfig = newACLConfig(partitionInfo, logger)
aclConfig := ACLResolverConfig{
Config: config.ACLResolverSettings,
Backend: &serverACLResolverBackend{Server: s},
CacheConfig: serverACLCacheConfig,
Logger: logger,
ACLConfig: s.aclConfig,
Tokens: flat.Tokens,
}
// Initialize the ACL resolver.
if s.ACLResolver, err = NewACLResolver(&aclConfig); err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to create ACL resolver: %v", err)
}
// Initialize the RPC layer.
if err := s.setupRPC(); err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to start RPC layer: %v", err)
}
// Initialize any extra RPC listeners for segments.
segmentListeners, err := s.setupSegmentRPC()
if err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to start segment RPC layer: %v", err)
}
// Initialize the Raft server.
if err := s.setupRaft(); err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to start Raft: %v", err)
}
s.caManager = NewCAManager(&caDelegateWithState{Server: s}, s.leaderRoutineManager, s.logger.ResetNamed("connect.ca"), s.config)
if s.config.ConnectEnabled && (s.config.AutoEncryptAllowTLS || s.config.AutoConfigAuthzEnabled) {
go s.connectCARootsMonitor(&lib.StopChannelContext{StopCh: s.shutdownCh})
}
if s.gatewayLocator != nil {
go s.gatewayLocator.Run(&lib.StopChannelContext{StopCh: s.shutdownCh})
}
// Serf and dynamic bind ports
//
// The LAN serf cluster announces the port of the WAN serf cluster
// which creates a race when the WAN cluster is supposed to bind to
// a dynamic port (port 0). The current memberlist implementation will
// update the bind port in the configuration after the memberlist is
// created, so we can pull it out from there reliably, even though it's
// a little gross to be reading the updated config.
// Initialize the WAN Serf if enabled
if config.SerfWANConfig != nil {
s.serfWAN, s.serfWANConfig, err = s.setupSerf(setupSerfOptions{
Config: config.SerfWANConfig,
EventCh: s.eventChWAN,
SnapshotPath: serfWANSnapshot,
WAN: true,
Listener: s.Listener,
})
if err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to start WAN Serf: %v", err)
}
// This is always a *memberlist.NetTransport or something which wraps
// it which satisfies this interface.
s.memberlistTransportWAN = config.SerfWANConfig.MemberlistConfig.Transport.(wanfed.IngestionAwareTransport)
// See big comment above why we are doing this.
serfBindPortWAN := config.SerfWANConfig.MemberlistConfig.BindPort
if serfBindPortWAN == 0 {
serfBindPortWAN = config.SerfWANConfig.MemberlistConfig.BindPort
if serfBindPortWAN == 0 {
return nil, fmt.Errorf("Failed to get dynamic bind port for WAN Serf")
}
s.logger.Info("Serf WAN TCP bound", "port", serfBindPortWAN)
}
}
// Initialize the LAN segments before the default LAN Serf so we have
// updated port information to publish there.
if err := s.setupSegments(config, segmentListeners); err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to setup network segments: %v", err)
}
// Initialize the LAN Serf for the default network segment.
if err := s.setupSerfLAN(config); err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to start LAN Serf: %v", err)
}
if err := s.router.AddArea(types.AreaLAN, s.serfLAN, s.connPool); err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to add LAN serf route: %w", err)
}
go s.lanEventHandler()
// Start the flooders after the LAN event handler is wired up.
s.floodSegments(config)
// Add a "static route" to the WAN Serf and hook it up to Serf events.
if s.serfWAN != nil {
if err := s.router.AddArea(types.AreaWAN, s.serfWAN, s.connPool); err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to add WAN serf route: %v", err)
}
go router.HandleSerfEvents(s.logger, s.router, types.AreaWAN, s.serfWAN.ShutdownCh(), s.eventChWAN, s.wanMembershipNotifyCh)
// Fire up the LAN <-> WAN join flooder.
addrFn := func(s *metadata.Server) (string, error) {
if s.WanJoinPort == 0 {
return "", fmt.Errorf("no wan join port for server: %s", s.Addr.String())
}
addr, _, err := net.SplitHostPort(s.Addr.String())
if err != nil {
return "", err
}
return fmt.Sprintf("%s:%d", addr, s.WanJoinPort), nil
}
go s.Flood(addrFn, s.serfWAN)
}
// Start enterprise specific functionality
if err := s.startEnterprise(); err != nil {
s.Shutdown()
return nil, err
}
reporter, err := usagemetrics.NewUsageMetricsReporter(
new(usagemetrics.Config).
WithStateProvider(s.fsm).
WithLogger(s.logger).
WithDatacenter(s.config.Datacenter).
WithReportingInterval(s.config.MetricsReportingInterval).
WithGetMembersFunc(func() []serf.Member {
members, err := s.lanPoolAllMembers()
if err != nil {
return []serf.Member{}
}
return members
}),
)
if err != nil {
s.Shutdown()
return nil, fmt.Errorf("Failed to start usage metrics reporter: %v", err)
}
go reporter.Run(&lib.StopChannelContext{StopCh: s.shutdownCh})
s.overviewManager = NewOverviewManager(s.logger, s.fsm, s.config.MetricsReportingInterval)
go s.overviewManager.Run(&lib.StopChannelContext{StopCh: s.shutdownCh})
// Initialize public gRPC server - register services on public gRPC server.
s.publicConnectCAServer = connectca.NewServer(connectca.Config{
Publisher: s.publisher,
GetStore: func() connectca.StateStore { return s.FSM().State() },
Logger: logger.Named("grpc-api.connect-ca"),
ACLResolver: plainACLResolver{s.ACLResolver},
CAManager: s.caManager,
ForwardRPC: func(info structs.RPCInfo, fn func(*grpc.ClientConn) error) (bool, error) {
return s.ForwardGRPC(s.grpcConnPool, info, fn)
},
ConnectEnabled: s.config.ConnectEnabled,
})
s.publicConnectCAServer.Register(s.publicGRPCServer)
dataplane.NewServer(dataplane.Config{
GetStore: func() dataplane.StateStore { return s.FSM().State() },
Logger: logger.Named("grpc-api.dataplane"),
ACLResolver: plainACLResolver{s.ACLResolver},
Datacenter: s.config.Datacenter,
}).Register(s.publicGRPCServer)
serverdiscovery.NewServer(serverdiscovery.Config{
Publisher: s.publisher,
ACLResolver: plainACLResolver{s.ACLResolver},
Logger: logger.Named("grpc-api.server-discovery"),
}).Register(s.publicGRPCServer)
// Initialize private gRPC server.
//
// Note: some "public" gRPC services are also exposed on the private gRPC server
// to enable RPC forwarding.
s.grpcHandler = newGRPCHandlerFromConfig(flat, config, s)
s.grpcLeaderForwarder = flat.LeaderForwarder
go s.trackLeaderChanges()
// Initialize Autopilot. This must happen before starting leadership monitoring
// as establishing leadership could attempt to use autopilot and cause a panic.
s.initAutopilot(config)
// Start monitoring leadership. This must happen after Serf is set up
// since it can fire events when leadership is obtained.
go s.monitorLeadership()
// Start listening for RPC requests.
go func() {
if err := s.grpcHandler.Run(); err != nil {
s.logger.Error("gRPC server failed", "error", err)
}
}()
go s.listen(s.Listener)
// Start listeners for any segments with separate RPC listeners.
for _, listener := range segmentListeners {
go s.listen(listener)
}
// start autopilot - this must happen after the RPC listeners get setup
// or else it may block
s.autopilot.Start(&lib.StopChannelContext{StopCh: s.shutdownCh})
// Start the metrics handlers.
go s.updateMetrics()
return s, nil
}
func newGRPCHandlerFromConfig(deps Deps, config *Config, s *Server) connHandler {
p := peering.NewService(
deps.Logger.Named("grpc-api.peering"),
NewPeeringBackend(s, deps.GRPCConnPool),
)
s.peeringService = p
register := func(srv *grpc.Server) {
if config.RPCConfig.EnableStreaming {
pbsubscribe.RegisterStateChangeSubscriptionServer(srv, subscribe.NewServer(
&subscribeBackend{srv: s, connPool: deps.GRPCConnPool},
deps.Logger.Named("grpc-api.subscription")))
}
pbpeering.RegisterPeeringServiceServer(srv, s.peeringService)
s.registerEnterpriseGRPCServices(deps, srv)
// Note: this public gRPC service is also exposed on the private server to
// enable RPC forwarding.
s.publicConnectCAServer.Register(srv)
}
return agentgrpc.NewHandler(deps.Logger, config.RPCAddr, register)
}
func (s *Server) connectCARootsMonitor(ctx context.Context) {
for {
ws := memdb.NewWatchSet()
state := s.fsm.State()
ws.Add(state.AbandonCh())
_, cas, err := state.CARoots(ws)
if err != nil {
s.logger.Error("Failed to watch AutoEncrypt CARoot", "error", err)
return
}
caPems := []string{}
for _, ca := range cas {
caPems = append(caPems, ca.RootCert)
}
if err := s.tlsConfigurator.UpdateAutoTLSCA(caPems); err != nil {
s.logger.Error("Failed to update AutoEncrypt CAPems", "error", err)
}
if err := ws.WatchCtx(ctx); err == context.Canceled {
s.logger.Info("shutting down Connect CA roots monitor")
return
}
}
}
// setupRaft is used to setup and initialize Raft
func (s *Server) setupRaft() error {
// If we have an unclean exit then attempt to close the Raft store.
defer func() {
if s.raft == nil && s.raftStore != nil {
if err := s.raftStore.Close(); err != nil {
s.logger.Error("failed to close Raft store", "error", err)
}
}
}()
var serverAddressProvider raft.ServerAddressProvider = nil
if s.config.RaftConfig.ProtocolVersion >= 3 { // ServerAddressProvider needs server ids to work correctly, which is only supported in protocol version 3 or higher
serverAddressProvider = s.serverLookup
}
// Create a transport layer.
transConfig := &raft.NetworkTransportConfig{
Stream: s.raftLayer,
MaxPool: 3,
Timeout: 10 * time.Second,
ServerAddressProvider: serverAddressProvider,
Logger: s.loggers.Named(logging.Raft),
}
trans := raft.NewNetworkTransportWithConfig(transConfig)
s.raftTransport = trans
s.config.RaftConfig.Logger = s.loggers.Named(logging.Raft)
// Versions of the Raft protocol below 3 require the LocalID to match the network
// address of the transport.
s.config.RaftConfig.LocalID = raft.ServerID(trans.LocalAddr())
if s.config.RaftConfig.ProtocolVersion >= 3 {
s.config.RaftConfig.LocalID = raft.ServerID(s.config.NodeID)
}
// Build an all in-memory setup for dev mode, otherwise prepare a full
// disk-based setup.
var log raft.LogStore
var stable raft.StableStore
var snap raft.SnapshotStore
if s.config.DevMode {
store := raft.NewInmemStore()
s.raftInmem = store
stable = store
log = store
snap = raft.NewInmemSnapshotStore()
} else {
// Create the base raft path.
path := filepath.Join(s.config.DataDir, raftState)
if err := lib.EnsurePath(path, true); err != nil {
return err
}
// Create the backend raft store for logs and stable storage.
store, err := raftboltdb.New(raftboltdb.Options{
BoltOptions: &bbolt.Options{
NoFreelistSync: s.config.RaftBoltDBConfig.NoFreelistSync,
},
Path: filepath.Join(path, "raft.db"),
})
if err != nil {
return err
}
s.raftStore = store
stable = store
// start publishing boltdb metrics
go store.RunMetrics(&lib.StopChannelContext{StopCh: s.shutdownCh}, 0)
// Wrap the store in a LogCache to improve performance.
cacheStore, err := raft.NewLogCache(raftLogCacheSize, store)
if err != nil {
return err
}
log = cacheStore
// Create the snapshot store.
snapshots, err := raft.NewFileSnapshotStoreWithLogger(path, snapshotsRetained, s.logger.Named("snapshot"))
if err != nil {
return err
}
snap = snapshots
// For an existing cluster being upgraded to the new version of
// Raft, we almost never want to run recovery based on the old
// peers.json file. We create a peers.info file with a helpful
// note about where peers.json went, and use that as a sentinel
// to avoid ingesting the old one that first time (if we have to
// create the peers.info file because it's not there, we also
// blow away any existing peers.json file).
peersFile := filepath.Join(path, "peers.json")
peersInfoFile := filepath.Join(path, "peers.info")
if _, err := os.Stat(peersInfoFile); os.IsNotExist(err) {
if err := ioutil.WriteFile(peersInfoFile, []byte(peersInfoContent), 0755); err != nil {
return fmt.Errorf("failed to write peers.info file: %v", err)
}
// Blow away the peers.json file if present, since the
// peers.info sentinel wasn't there.
if _, err := os.Stat(peersFile); err == nil {
if err := os.Remove(peersFile); err != nil {
return fmt.Errorf("failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
}
s.logger.Info("deleted peers.json file (see peers.info for details)")
}
} else if _, err := os.Stat(peersFile); err == nil {
s.logger.Info("found peers.json file, recovering Raft configuration...")
var configuration raft.Configuration
if s.config.RaftConfig.ProtocolVersion < 3 {
configuration, err = raft.ReadPeersJSON(peersFile)
} else {
configuration, err = raft.ReadConfigJSON(peersFile)
}
if err != nil {
return fmt.Errorf("recovery failed to parse peers.json: %v", err)
}
tmpFsm := fsm.NewFromDeps(fsm.Deps{
Logger: s.logger,
NewStateStore: func() *state.Store {
return state.NewStateStore(s.tombstoneGC)
},
})
if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm,
log, stable, snap, trans, configuration); err != nil {
return fmt.Errorf("recovery failed: %v", err)
}
if err := os.Remove(peersFile); err != nil {
return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
}
s.logger.Info("deleted peers.json file after successful recovery")
}
}
// If we are in bootstrap or dev mode and the state is clean then we can
// bootstrap now.
if (s.config.Bootstrap || s.config.DevMode) && !s.config.ReadReplica {
hasState, err := raft.HasExistingState(log, stable, snap)
if err != nil {
return err
}
if !hasState {
configuration := raft.Configuration{
Servers: []raft.Server{
{
ID: s.config.RaftConfig.LocalID,
Address: trans.LocalAddr(),
},
},
}
if err := raft.BootstrapCluster(s.config.RaftConfig,
log, stable, snap, trans, configuration); err != nil {
return err
}
}
}
// Set up a channel for reliable leader notifications.
raftNotifyCh := make(chan bool, 10)
s.config.RaftConfig.NotifyCh = raftNotifyCh
s.raftNotifyCh = raftNotifyCh
// Setup the Raft store.
var err error
s.raft, err = raft.NewRaft(s.config.RaftConfig, s.fsm.ChunkingFSM(), log, stable, snap, trans)
return err
}
// endpointFactory is a function that returns an RPC endpoint bound to the given
// server.
type factory func(s *Server) interface{}
// endpoints is a list of registered RPC endpoint factories.
var endpoints []factory
// registerEndpoint registers a new RPC endpoint factory.
func registerEndpoint(fn factory) {
endpoints = append(endpoints, fn)
}
// setupRPC is used to setup the RPC listener
func (s *Server) setupRPC() error {
s.rpcConnLimiter.SetConfig(connlimit.Config{
MaxConnsPerClientIP: s.config.RPCMaxConnsPerClient,
})
for _, fn := range endpoints {
s.rpcServer.Register(fn(s))
}
// Only register AutoEncrypt on the insecure RPC server. Insecure only
// means that verify incoming is turned off even though it might have
// been configured.
s.insecureRPCServer.Register(&AutoEncrypt{srv: s})
// Setup the AutoConfig JWT Authorizer
var authz AutoConfigAuthorizer
if s.config.AutoConfigAuthzEnabled {
// create the auto config authorizer from the JWT authmethod
validator, err := ssoauth.NewValidator(s.logger, &s.config.AutoConfigAuthzAuthMethod)
if err != nil {
return fmt.Errorf("Failed to initialize JWT Auto Config Authorizer: %w", err)
}
authz = &jwtAuthorizer{
validator: validator,
allowReuse: s.config.AutoConfigAuthzAllowReuse,
claimAssertions: s.config.AutoConfigAuthzClaimAssertions,
}
} else {
// This authorizer always returns that the endpoint is disabled
authz = &disabledAuthorizer{}
}
// now register with the insecure RPC server