Skip to content

Commit

Permalink
Finish up cluster peering failover (#14396)
Browse files Browse the repository at this point in the history
  • Loading branch information
erichaberkorn committed Aug 30, 2022
1 parent 70bb6a2 commit 3726a0a
Show file tree
Hide file tree
Showing 33 changed files with 1,297 additions and 183 deletions.
3 changes: 3 additions & 0 deletions .changelog/14396.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:feature
peering: Add support to failover to services running on cluster peers.
```
34 changes: 23 additions & 11 deletions agent/proxycfg/connect_proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,16 +280,6 @@ func (s *handlerConnectProxy) handleUpdate(ctx context.Context, u UpdateEvent, s
}
snap.Roots = roots

case strings.HasPrefix(u.CorrelationID, peerTrustBundleIDPrefix):
resp, ok := u.Result.(*pbpeering.TrustBundleReadResponse)
if !ok {
return fmt.Errorf("invalid type for response: %T", u.Result)
}
peer := strings.TrimPrefix(u.CorrelationID, peerTrustBundleIDPrefix)
if resp.Bundle != nil {
snap.ConnectProxy.UpstreamPeerTrustBundles.Set(peer, resp.Bundle)
}

case u.CorrelationID == peeringTrustBundlesWatchID:
resp, ok := u.Result.(*pbpeering.TrustBundleListByServiceResponse)
if !ok {
Expand Down Expand Up @@ -369,6 +359,17 @@ func (s *handlerConnectProxy) handleUpdate(ctx context.Context, u UpdateEvent, s
// Clean up data
//

peeredChainTargets := make(map[UpstreamID]struct{})
for _, discoChain := range snap.ConnectProxy.DiscoveryChain {
for _, target := range discoChain.Targets {
if target.Peer == "" {
continue
}
uid := NewUpstreamIDFromTargetID(target.ID)
peeredChainTargets[uid] = struct{}{}
}
}

validPeerNames := make(map[string]struct{})

// Iterate through all known endpoints and remove references to upstream IDs that weren't in the update
Expand All @@ -383,6 +384,11 @@ func (s *handlerConnectProxy) handleUpdate(ctx context.Context, u UpdateEvent, s
validPeerNames[uid.Peer] = struct{}{}
return true
}
// Peered upstream came from a discovery chain target
if _, ok := peeredChainTargets[uid]; ok {
validPeerNames[uid.Peer] = struct{}{}
return true
}
snap.ConnectProxy.PeerUpstreamEndpoints.CancelWatch(uid)
return true
})
Expand Down Expand Up @@ -463,8 +469,14 @@ func (s *handlerConnectProxy) handleUpdate(ctx context.Context, u UpdateEvent, s
continue
}
if _, ok := seenUpstreams[uid]; !ok {
for _, cancelFn := range targets {
for targetID, cancelFn := range targets {
cancelFn()

targetUID := NewUpstreamIDFromTargetID(targetID)
if targetUID.Peer != "" {
snap.ConnectProxy.PeerUpstreamEndpoints.CancelWatch(targetUID)
snap.ConnectProxy.UpstreamPeerTrustBundles.CancelWatch(targetUID.Peer)
}
}
delete(snap.ConnectProxy.WatchedUpstreams, uid)
}
Expand Down
11 changes: 11 additions & 0 deletions agent/proxycfg/ingress_gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import (
"fmt"

cachetype "github.com/hashicorp/consul/agent/cache-types"
"github.com/hashicorp/consul/agent/proxycfg/internal/watch"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/proto/pbpeering"
)

type handlerIngressGateway struct {
Expand Down Expand Up @@ -66,6 +68,9 @@ func (s *handlerIngressGateway) initialize(ctx context.Context) (ConfigSnapshot,
snap.IngressGateway.WatchedGateways = make(map[UpstreamID]map[string]context.CancelFunc)
snap.IngressGateway.WatchedGatewayEndpoints = make(map[UpstreamID]map[string]structs.CheckServiceNodes)
snap.IngressGateway.Listeners = make(map[IngressListenerKey]structs.IngressListener)
snap.IngressGateway.UpstreamPeerTrustBundles = watch.NewMap[string, *pbpeering.PeeringTrustBundle]()
snap.IngressGateway.PeerUpstreamEndpoints = watch.NewMap[UpstreamID, structs.CheckServiceNodes]()
snap.IngressGateway.PeerUpstreamEndpointsUseHostnames = make(map[UpstreamID]struct{})
return snap, nil
}

Expand Down Expand Up @@ -152,6 +157,12 @@ func (s *handlerIngressGateway) handleUpdate(ctx context.Context, u UpdateEvent,
delete(snap.IngressGateway.WatchedUpstreams[uid], targetID)
delete(snap.IngressGateway.WatchedUpstreamEndpoints[uid], targetID)
cancelUpstreamFn()

targetUID := NewUpstreamIDFromTargetID(targetID)
if targetUID.Peer != "" {
snap.IngressGateway.PeerUpstreamEndpoints.CancelWatch(targetUID)
snap.IngressGateway.UpstreamPeerTrustBundles.CancelWatch(targetUID.Peer)
}
}

cancelFn()
Expand Down
12 changes: 12 additions & 0 deletions agent/proxycfg/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,18 @@ func (s *ConfigSnapshot) MeshConfigTLSOutgoing() *structs.MeshDirectionalTLSConf
return mesh.TLS.Outgoing
}

func (s *ConfigSnapshot) ToConfigSnapshotUpstreams() (*ConfigSnapshotUpstreams, error) {
switch s.Kind {
case structs.ServiceKindConnectProxy:
return &s.ConnectProxy.ConfigSnapshotUpstreams, nil
case structs.ServiceKindIngressGateway:
return &s.IngressGateway.ConfigSnapshotUpstreams, nil
default:
// This is a coherence check and should never fail
return nil, fmt.Errorf("No upstream snapshot for gateway mode %q", s.Kind)
}
}

func (u *ConfigSnapshotUpstreams) UpstreamPeerMeta(uid UpstreamID) structs.PeeringServiceMeta {
nodes, _ := u.PeerUpstreamEndpoints.Get(uid)
if len(nodes) == 0 {
Expand Down
62 changes: 52 additions & 10 deletions agent/proxycfg/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,11 @@ func TestState_WatchesAndUpdates(t *testing.T) {
Mode: structs.MeshGatewayModeNone,
},
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api-failover-to-peer",
LocalBindPort: 10007,
},
structs.Upstream{
DestinationType: structs.UpstreamDestTypeService,
DestinationName: "api-dc2",
Expand Down Expand Up @@ -552,6 +557,16 @@ func TestState_WatchesAndUpdates(t *testing.T) {
Mode: structs.MeshGatewayModeNone,
},
}),
fmt.Sprintf("discovery-chain:%s-failover-to-peer", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api-failover-to-peer",
EvaluateInDatacenter: "dc1",
EvaluateInNamespace: "default",
EvaluateInPartition: "default",
Datacenter: "dc1",
OverrideMeshGateway: structs.MeshGatewayConfig{
Mode: meshGatewayProxyConfigValue,
},
}),
fmt.Sprintf("discovery-chain:%s-dc2", apiUID.String()): genVerifyDiscoveryChainWatch(&structs.DiscoveryChainRequest{
Name: "api-dc2",
EvaluateInDatacenter: "dc1",
Expand Down Expand Up @@ -639,22 +654,45 @@ func TestState_WatchesAndUpdates(t *testing.T) {
},
Err: nil,
},
{
CorrelationID: fmt.Sprintf("discovery-chain:%s-failover-to-peer", apiUID.String()),
Result: &structs.DiscoveryChainResponse{
Chain: discoverychain.TestCompileConfigEntries(t, "api-failover-to-peer", "default", "default", "dc1", "trustdomain.consul",
func(req *discoverychain.CompileRequest) {
req.OverrideMeshGateway.Mode = meshGatewayProxyConfigValue
}, &structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: "api-failover-to-peer",
Failover: map[string]structs.ServiceResolverFailover{
"*": {
Targets: []structs.ServiceResolverFailoverTarget{
{Peer: "cluster-01"},
},
},
},
}),
},
Err: nil,
},
},
verifySnapshot: func(t testing.TB, snap *ConfigSnapshot) {
require.True(t, snap.Valid())
require.True(t, snap.MeshGateway.isEmpty())
require.Equal(t, indexedRoots, snap.Roots)

require.Equal(t, issuedCert, snap.ConnectProxy.Leaf)
require.Len(t, snap.ConnectProxy.DiscoveryChain, 5, "%+v", snap.ConnectProxy.DiscoveryChain)
require.Len(t, snap.ConnectProxy.WatchedUpstreams, 5, "%+v", snap.ConnectProxy.WatchedUpstreams)
require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 5, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints)
require.Len(t, snap.ConnectProxy.WatchedGateways, 5, "%+v", snap.ConnectProxy.WatchedGateways)
require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 5, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints)
require.Len(t, snap.ConnectProxy.DiscoveryChain, 6, "%+v", snap.ConnectProxy.DiscoveryChain)
require.Len(t, snap.ConnectProxy.WatchedUpstreams, 6, "%+v", snap.ConnectProxy.WatchedUpstreams)
require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 6, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints)
require.Len(t, snap.ConnectProxy.WatchedGateways, 6, "%+v", snap.ConnectProxy.WatchedGateways)
require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 6, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints)

require.Len(t, snap.ConnectProxy.WatchedServiceChecks, 0, "%+v", snap.ConnectProxy.WatchedServiceChecks)
require.Len(t, snap.ConnectProxy.PreparedQueryEndpoints, 0, "%+v", snap.ConnectProxy.PreparedQueryEndpoints)

require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.PeerUpstreamEndpoints.Len())
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.UpstreamPeerTrustBundles.Len())

require.True(t, snap.ConnectProxy.IntentionsSet)
require.Equal(t, ixnMatch, snap.ConnectProxy.Intentions)
require.True(t, snap.ConnectProxy.MeshConfigSet)
Expand All @@ -667,6 +705,7 @@ func TestState_WatchesAndUpdates(t *testing.T) {
fmt.Sprintf("upstream-target:api-failover-remote.default.default.dc2:%s-failover-remote?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-remote", "", "dc2", true),
fmt.Sprintf("upstream-target:api-failover-local.default.default.dc2:%s-failover-local?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-local", "", "dc2", true),
fmt.Sprintf("upstream-target:api-failover-direct.default.default.dc2:%s-failover-direct?dc=dc2", apiUID.String()): genVerifyServiceSpecificRequest("api-failover-direct", "", "dc2", true),
upstreamPeerWatchIDPrefix + fmt.Sprintf("%s-failover-to-peer?peer=cluster-01", apiUID.String()): genVerifyServiceSpecificPeeredRequest("api-failover-to-peer", "", "", "cluster-01", true),
fmt.Sprintf("mesh-gateway:dc2:%s-failover-remote?dc=dc2", apiUID.String()): genVerifyGatewayWatch("dc2"),
fmt.Sprintf("mesh-gateway:dc1:%s-failover-local?dc=dc2", apiUID.String()): genVerifyGatewayWatch("dc1"),
},
Expand All @@ -676,15 +715,18 @@ func TestState_WatchesAndUpdates(t *testing.T) {
require.Equal(t, indexedRoots, snap.Roots)

require.Equal(t, issuedCert, snap.ConnectProxy.Leaf)
require.Len(t, snap.ConnectProxy.DiscoveryChain, 5, "%+v", snap.ConnectProxy.DiscoveryChain)
require.Len(t, snap.ConnectProxy.WatchedUpstreams, 5, "%+v", snap.ConnectProxy.WatchedUpstreams)
require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 5, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints)
require.Len(t, snap.ConnectProxy.WatchedGateways, 5, "%+v", snap.ConnectProxy.WatchedGateways)
require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 5, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints)
require.Len(t, snap.ConnectProxy.DiscoveryChain, 6, "%+v", snap.ConnectProxy.DiscoveryChain)
require.Len(t, snap.ConnectProxy.WatchedUpstreams, 6, "%+v", snap.ConnectProxy.WatchedUpstreams)
require.Len(t, snap.ConnectProxy.WatchedUpstreamEndpoints, 6, "%+v", snap.ConnectProxy.WatchedUpstreamEndpoints)
require.Len(t, snap.ConnectProxy.WatchedGateways, 6, "%+v", snap.ConnectProxy.WatchedGateways)
require.Len(t, snap.ConnectProxy.WatchedGatewayEndpoints, 6, "%+v", snap.ConnectProxy.WatchedGatewayEndpoints)

require.Len(t, snap.ConnectProxy.WatchedServiceChecks, 0, "%+v", snap.ConnectProxy.WatchedServiceChecks)
require.Len(t, snap.ConnectProxy.PreparedQueryEndpoints, 0, "%+v", snap.ConnectProxy.PreparedQueryEndpoints)

require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.PeerUpstreamEndpoints.Len())
require.Equal(t, 1, snap.ConnectProxy.ConfigSnapshotUpstreams.UpstreamPeerTrustBundles.Len())

require.True(t, snap.ConnectProxy.IntentionsSet)
require.Equal(t, ixnMatch, snap.ConnectProxy.Intentions)
},
Expand Down
25 changes: 25 additions & 0 deletions agent/proxycfg/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,31 @@ func TestUpstreamNodesDC2(t testing.T) structs.CheckServiceNodes {
}
}

func TestUpstreamNodesPeerCluster01(t testing.T) structs.CheckServiceNodes {
peer := "cluster-01"
service := structs.TestNodeServiceWithNameInPeer(t, "web", peer)
return structs.CheckServiceNodes{
structs.CheckServiceNode{
Node: &structs.Node{
ID: "test1",
Node: "test1",
Address: "10.40.1.1",
PeerName: peer,
},
Service: service,
},
structs.CheckServiceNode{
Node: &structs.Node{
ID: "test2",
Node: "test2",
Address: "10.40.1.2",
PeerName: peer,
},
Service: service,
},
}
}

func TestUpstreamNodesInStatusDC2(t testing.T, status string) structs.CheckServiceNodes {
return structs.CheckServiceNodes{
structs.CheckServiceNode{
Expand Down
34 changes: 34 additions & 0 deletions agent/proxycfg/testing_upstreams.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/hashicorp/consul/agent/connect"
"github.com/hashicorp/consul/agent/consul/discoverychain"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/proto/pbpeering"
)

func setupTestVariationConfigEntriesAndSnapshot(
Expand Down Expand Up @@ -72,6 +73,24 @@ func setupTestVariationConfigEntriesAndSnapshot(
Nodes: TestGatewayNodesDC2(t),
},
})
case "failover-to-cluster-peer":
events = append(events, UpdateEvent{
CorrelationID: "peer-trust-bundle:cluster-01",
Result: &pbpeering.TrustBundleReadResponse{
Bundle: &pbpeering.PeeringTrustBundle{
PeerName: "peer1",
TrustDomain: "peer1.domain",
ExportedPartition: "peer1ap",
RootPEMs: []string{"peer1-root-1"},
},
},
})
events = append(events, UpdateEvent{
CorrelationID: "upstream-peer:db?peer=cluster-01",
Result: &structs.IndexedCheckServiceNodes{
Nodes: TestUpstreamNodesPeerCluster01(t),
},
})
case "failover-through-double-remote-gateway-triggered":
events = append(events, UpdateEvent{
CorrelationID: "upstream-target:db.default.default.dc1:" + dbUID.String(),
Expand Down Expand Up @@ -255,6 +274,21 @@ func setupTestVariationDiscoveryChain(
},
},
)
case "failover-to-cluster-peer":
entries = append(entries,
&structs.ServiceResolverConfigEntry{
Kind: structs.ServiceResolver,
Name: "db",
ConnectTimeout: 33 * time.Second,
Failover: map[string]structs.ServiceResolverFailover{
"*": {
Targets: []structs.ServiceResolverFailoverTarget{
{Peer: "cluster-01"},
},
},
},
},
)
case "failover-through-double-remote-gateway-triggered":
fallthrough
case "failover-through-double-remote-gateway":
Expand Down

0 comments on commit 3726a0a

Please sign in to comment.