Skip to content

Commit

Permalink
feat(gw): improved If-None-Match support (#8891)
Browse files Browse the repository at this point in the history
Improves the way we handle If-None-Match header:
- Support for more than one Etag passed in If-None-Match
- Match both strong and weak Etags to maximize caching across
  various HTTP clients and libraries (some send weak Etags by default)
- Support for wildcard '*'
- Tests for If-None-Match behavior
  • Loading branch information
lidel committed Apr 19, 2022
1 parent d59730f commit 67fdb6e
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 14 deletions.
83 changes: 78 additions & 5 deletions core/corehttp/gateway_handler.go
Expand Up @@ -7,6 +7,7 @@ import (
"io"
"mime"
"net/http"
"net/textproto"
"net/url"
"os"
gopath "path"
Expand Down Expand Up @@ -390,11 +391,18 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat))
trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String()))

// Finish early if client already has matching Etag
ifNoneMatch := r.Header.Get("If-None-Match")
if ifNoneMatch == getEtag(r, resolvedPath.Cid()) || ifNoneMatch == getDirListingEtag(resolvedPath.Cid()) {
w.WriteHeader(http.StatusNotModified)
return
// Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified
if inm := r.Header.Get("If-None-Match"); inm != "" {
pathCid := resolvedPath.Cid()
// need to check against both File and Dir Etag variants
// because this inexpensive check happens before we do any I/O
cidEtag := getEtag(r, pathCid)
dirEtag := getDirListingEtag(pathCid)
if etagMatch(inm, cidEtag, dirEtag) {
// Finish early if client already has a matching Etag
w.WriteHeader(http.StatusNotModified)
return
}
}

if err := i.handleGettingFirstBlock(r, begin, contentPath, resolvedPath); err != nil {
Expand Down Expand Up @@ -790,6 +798,71 @@ func getFilename(contentPath ipath.Path) string {
return gopath.Base(s)
}

// etagMatch evaluates if we can respond with HTTP 304 Not Modified
// It supports multiple weak and strong etags passed in If-None-Matc stringh
// including the wildcard one.
func etagMatch(ifNoneMatchHeader string, cidEtag string, dirEtag string) bool {
buf := ifNoneMatchHeader
for {
buf = textproto.TrimString(buf)
if len(buf) == 0 {
break
}
if buf[0] == ',' {
buf = buf[1:]
continue
}
// If-None-Match: * should match against any etag
if buf[0] == '*' {
return true
}
etag, remain := scanETag(buf)
if etag == "" {
break
}
// Check for match both strong and weak etags
if etagWeakMatch(etag, cidEtag) || etagWeakMatch(etag, dirEtag) {
return true
}
buf = remain
}
return false
}

// scanETag determines if a syntactically valid ETag is present at s. If so,
// the ETag and remaining text after consuming ETag is returned. Otherwise,
// it returns "", "".
// (This is the same logic as one executed inside of http.ServeContent)
func scanETag(s string) (etag string, remain string) {
s = textproto.TrimString(s)
start := 0
if strings.HasPrefix(s, "W/") {
start = 2
}
if len(s[start:]) < 2 || s[start] != '"' {
return "", ""
}
// ETag is either W/"text" or "text".
// See RFC 7232 2.3.
for i := start + 1; i < len(s); i++ {
c := s[i]
switch {
// Character values allowed in ETags.
case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80:
case c == '"':
return s[:i+1], s[i+1:]
default:
return "", ""
}
}
return "", ""
}

// etagWeakMatch reports whether a and b match using weak ETag comparison.
func etagWeakMatch(a, b string) bool {
return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/")
}

// generate Etag value based on HTTP request and CID
func getEtag(r *http.Request, cid cid.Cid) string {
prefix := `"`
Expand Down
12 changes: 3 additions & 9 deletions core/corehttp/gateway_handler_unixfs_dir.go
Expand Up @@ -93,15 +93,9 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
// type instead of relying on autodetection (which may fail).
w.Header().Set("Content-Type", "text/html")

// Generated dir index requires custom Etag (it may change between go-ipfs versions)
if assets.AssetHash != "" {
dirEtag := getDirListingEtag(resolvedPath.Cid())
w.Header().Set("Etag", dirEtag)
if r.Header.Get("If-None-Match") == dirEtag {
w.WriteHeader(http.StatusNotModified)
return
}
}
// Generated dir index requires custom Etag (output may change between go-ipfs versions)
dirEtag := getDirListingEtag(resolvedPath.Cid())
w.Header().Set("Etag", dirEtag)

if r.Method == http.MethodHead {
logger.Debug("return as request's HTTP method is HEAD")
Expand Down
25 changes: 25 additions & 0 deletions core/corehttp/gateway_test.go
Expand Up @@ -656,3 +656,28 @@ func TestVersion(t *testing.T) {
t.Fatalf("response doesn't contain protocol version:\n%s", s)
}
}

func TestEtagMatch(t *testing.T) {
for _, test := range []struct {
header string // value in If-None-Match HTTP header
cidEtag string
dirEtag string
expected bool // expected result of etagMatch(header, cidEtag, dirEtag)
}{
{"", `"etag"`, "", false}, // no If-None-Match
{"", "", `"etag"`, false}, // no If-None-Match
{`"etag"`, `"etag"`, "", true}, // file etag match
{`W/"etag"`, `"etag"`, "", true}, // file etag match
{`"foo", W/"bar", W/"etag"`, `"etag"`, "", true}, // file etag match (array)
{`"foo",W/"bar",W/"etag"`, `"etag"`, "", true}, // file etag match (compact array)
{`"etag"`, "", `W/"etag"`, true}, // dir etag match
{`"etag"`, "", `W/"etag"`, true}, // dir etag match
{`W/"etag"`, "", `W/"etag"`, true}, // dir etag match
{`*`, `"etag"`, "", true}, // wildcard etag match
} {
result := etagMatch(test.header, test.cidEtag, test.dirEtag)
if result != test.expected {
t.Fatalf("unexpected result of etagMatch(%q, %q, %q), got %t, expected %t", test.header, test.cidEtag, test.dirEtag, result, test.expected)
}
}
}
49 changes: 49 additions & 0 deletions test/sharness/t0116-gateway-cache.sh
Expand Up @@ -145,6 +145,55 @@ test_expect_success "Prepare IPNS unixfs content path for testing" '
grep "< Etag: \"${FILE_CID}\"" curl_ipns_file_output
'

# If-None-Match (return 304 Not Modified when client sends matching Etag they already have)

test_expect_success "GET for /ipfs/ file with matching Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ dir with index.html file with matching Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: \"$ROOT4_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ file with matching third Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: \"fakeEtag1\", \"fakeEtag2\", \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ file with matching weak Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: W/\"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ file with wildcard Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: *" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipns/ file with matching Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ dir listing with matching weak Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: W/\"$ROOT3_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

# DirIndex etag is based on xxhash(./assets/dir-index-html), so we need to fetch it dynamically
test_expect_success "GET for /ipfs/ dir listing with matching strong Etag in If-None-Match returns 304 Not Modified" '
curl -Is "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/"| grep -i Etag | cut -f2- -d: | tr -d "[:space:]\"" > dir_index_etag &&
curl -svX GET -H "If-None-Match: \"$(<dir_index_etag)\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'
test_expect_success "GET for /ipfs/ dir listing with matching weak Etag in If-None-Match returns 304 Not Modified" '
curl -Is "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/"| grep -i Etag | cut -f2- -d: | tr -d "[:space:]\"" > dir_index_etag &&
curl -svX GET -H "If-None-Match: W/\"$(<dir_index_etag)\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_kill_ipfs_daemon

test_done

0 comments on commit 67fdb6e

Please sign in to comment.