diff --git a/modules/logging/netwriter.go b/modules/logging/netwriter.go index a25ede71b66..2bf23bf04e2 100644 --- a/modules/logging/netwriter.go +++ b/modules/logging/netwriter.go @@ -18,7 +18,9 @@ import ( "fmt" "io" "net" + "os" "sync" + "time" "github.com/caddyserver/caddy/v2" "github.com/caddyserver/caddy/v2/caddyconfig/caddyfile" @@ -28,10 +30,16 @@ func init() { caddy.RegisterModule(NetWriter{}) } -// NetWriter implements a log writer that outputs to a network socket. +// NetWriter implements a log writer that outputs to a network socket. If +// the socket goes down, it will dump logs to stderr while it attempts to +// reconnect. type NetWriter struct { + // The address of the network socket to which to connect. Address string `json:"address,omitempty"` + // The timeout to wait while connecting to the socket. + DialTimeout caddy.Duration `json:"dial_timeout,omitempty"` + addr caddy.NetworkAddress } @@ -60,6 +68,10 @@ func (nw *NetWriter) Provision(ctx caddy.Context) error { return fmt.Errorf("multiple ports not supported") } + if nw.DialTimeout < 0 { + return fmt.Errorf("timeout cannot be less than 0") + } + return nil } @@ -74,7 +86,10 @@ func (nw NetWriter) WriterKey() string { // OpenWriter opens a new network connection. func (nw NetWriter) OpenWriter() (io.WriteCloser, error) { - reconn := &redialerConn{nw: nw} + reconn := &redialerConn{ + nw: nw, + timeout: time.Duration(nw.DialTimeout), + } conn, err := reconn.dial() if err != nil { return nil, err @@ -87,7 +102,9 @@ func (nw NetWriter) OpenWriter() (io.WriteCloser, error) { // UnmarshalCaddyfile sets up the handler from Caddyfile tokens. Syntax: // -// net
+// net
{ +// dial_timeout +// } // func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { for d.Next() { @@ -98,6 +115,22 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { if d.NextArg() { return d.ArgErr() } + for nesting := d.Nesting(); d.NextBlock(nesting); { + switch d.Val() { + case "dial_timeout": + if !d.NextArg() { + return d.ArgErr() + } + timeout, err := caddy.ParseDuration(d.Val()) + if err != nil { + return d.Errf("invalid duration: %s", d.Val()) + } + if d.NextArg() { + return d.ArgErr() + } + nw.DialTimeout = caddy.Duration(timeout) + } + } } return nil } @@ -107,8 +140,10 @@ func (nw *NetWriter) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { // is retried. type redialerConn struct { net.Conn - connMu sync.RWMutex - nw NetWriter + connMu sync.RWMutex + nw NetWriter + timeout time.Duration + lastRedial time.Time } // Write wraps the underlying Conn.Write method, but if that fails, @@ -131,20 +166,32 @@ func (reconn *redialerConn) Write(b []byte) (n int, err error) { return } - // we're the lucky first goroutine to re-dial the connection - conn2, err2 := reconn.dial() - if err2 != nil { - return - } - if n, err = conn2.Write(b); err == nil { - reconn.Conn.Close() - reconn.Conn = conn2 + // there's still a problem, so try to re-attempt dialing the socket + // if some time has passed in which the issue could have potentially + // been resolved - we don't want to block at every single log + // emission (!) - see discussion in #4111 + if time.Since(reconn.lastRedial) > 10*time.Second { + reconn.lastRedial = time.Now() + conn2, err2 := reconn.dial() + if err2 != nil { + // logger socket still offline; instead of discarding the log, dump it to stderr + os.Stderr.Write(b) + return + } + if n, err = conn2.Write(b); err == nil { + reconn.Conn.Close() + reconn.Conn = conn2 + } + } else { + // last redial attempt was too recent; just dump to stderr for now + os.Stderr.Write(b) } + return } func (reconn *redialerConn) dial() (net.Conn, error) { - return net.Dial(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0)) + return net.DialTimeout(reconn.nw.addr.Network, reconn.nw.addr.JoinHostPort(0), reconn.timeout) } // Interface guards