Skip to content

Commit

Permalink
logging: Implement dial timeout for net writer (fix #4083) (#4172)
Browse files Browse the repository at this point in the history
* logging: Implement dial timeout for net writer (fix #4083)

* Limit how often redials are attempted

This should cause dial blocking to occur only once every 10 seconds at most, but it also means the logger connection might be down for up to 10 seconds after it comes back online; oh well. We shouldn't block for DialTimeout at every single log emission.

* Clarify offline behavior
  • Loading branch information
mholt committed May 19, 2021
1 parent c052162 commit e3c369d
Showing 1 changed file with 61 additions and 14 deletions.
75 changes: 61 additions & 14 deletions modules/logging/netwriter.go
Expand Up @@ -18,7 +18,9 @@ import (
"fmt"
"io"
"net"
"os"
"sync"
"time"

"github.com/caddyserver/caddy/v2"
"github.com/caddyserver/caddy/v2/caddyconfig/caddyfile"
Expand All @@ -28,10 +30,16 @@ func init() {
caddy.RegisterModule(NetWriter{})
}

// NetWriter implements a log writer that outputs to a network socket.
// NetWriter implements a log writer that outputs to a network socket. If
// the socket goes down, it will dump logs to stderr while it attempts to
// reconnect.
type NetWriter struct {
// The address of the network socket to which to connect.
Address string `json:"address,omitempty"`

// The timeout to wait while connecting to the socket.
DialTimeout caddy.Duration `json:"dial_timeout,omitempty"`

addr caddy.NetworkAddress
}

Expand Down Expand Up @@ -60,6 +68,10 @@ func (nw *NetWriter) Provision(ctx caddy.Context) error {
return fmt.Errorf("multiple ports not supported")
}

if nw.DialTimeout < 0 {
return fmt.Errorf("timeout cannot be less than 0")
}

return nil
}

Expand All @@ -74,7 +86,10 @@ func (nw NetWriter) WriterKey() string {

// OpenWriter opens a new network connection.
func (nw NetWriter) OpenWriter() (io.WriteCloser, error) {
reconn := &redialerConn{nw: nw}
reconn := &redialerConn{
nw: nw,
timeout: time.Duration(nw.DialTimeout),
}
conn, err := reconn.dial()
if err != nil {
return nil, err
Expand All @@ -87,7 +102,9 @@ func (nw NetWriter) OpenWriter() (io.WriteCloser, error) {

// UnmarshalCaddyfile sets up the handler from Caddyfile tokens. Syntax:
//
// net <address>
// net <address> {
// dial_timeout <duration>
// }
//
func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
for d.Next() {
Expand All @@ -98,6 +115,22 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
if d.NextArg() {
return d.ArgErr()
}
for nesting := d.Nesting(); d.NextBlock(nesting); {
switch d.Val() {
case "dial_timeout":
if !d.NextArg() {
return d.ArgErr()
}
timeout, err := caddy.ParseDuration(d.Val())
if err != nil {
return d.Errf("invalid duration: %s", d.Val())
}
if d.NextArg() {
return d.ArgErr()
}
nw.DialTimeout = caddy.Duration(timeout)
}
}
}
return nil
}
Expand All @@ -107,8 +140,10 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
// is retried.
type redialerConn struct {
net.Conn
connMu sync.RWMutex
nw NetWriter
connMu sync.RWMutex
nw NetWriter
timeout time.Duration
lastRedial time.Time
}

// Write wraps the underlying Conn.Write method, but if that fails,
Expand All @@ -131,20 +166,32 @@ func (reconn *redialerConn) Write(b []byte) (n int, err error) {
return
}

// we're the lucky first goroutine to re-dial the connection
conn2, err2 := reconn.dial()
if err2 != nil {
return
}
if n, err = conn2.Write(b); err == nil {
reconn.Conn.Close()
reconn.Conn = conn2
// there's still a problem, so try to re-attempt dialing the socket
// if some time has passed in which the issue could have potentially
// been resolved - we don't want to block at every single log
// emission (!) - see discussion in #4111
if time.Since(reconn.lastRedial) > 10*time.Second {
reconn.lastRedial = time.Now()
conn2, err2 := reconn.dial()
if err2 != nil {
// logger socket still offline; instead of discarding the log, dump it to stderr
os.Stderr.Write(b)
return
}
if n, err = conn2.Write(b); err == nil {
reconn.Conn.Close()
reconn.Conn = conn2
}
} else {
// last redial attempt was too recent; just dump to stderr for now
os.Stderr.Write(b)
}

return
}

func (reconn *redialerConn) dial() (net.Conn, error) {
return net.Dial(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0))
return net.DialTimeout(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0), reconn.timeout)
}

// Interface guards
Expand Down

0 comments on commit e3c369d

Please sign in to comment.