From a210abd74364076404c18df1acbeed8bd6a5d6b7 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Wed, 9 Nov 2022 19:20:33 +0100 Subject: [PATCH] feat(gateway): TAR response format (#9029) Implementation of IPIP-288 (https://github.com/ipfs/specs/pull/288) Co-authored-by: Marcin Rataj --- core/corehttp/gateway_handler.go | 18 +++- core/corehttp/gateway_handler_tar.go | 92 ++++++++++++++++++ docs/changelogs/v0.17.md | 21 +++- docs/examples/kubo-as-a-library/go.mod | 2 +- docs/examples/kubo-as-a-library/go.sum | 4 +- go.mod | 2 +- go.sum | 4 +- .../t0122-gateway-tar-data/inside-root.car | Bin 0 -> 383 bytes .../t0122-gateway-tar-data/outside-root.car | Bin 0 -> 249 bytes test/sharness/t0122-gateway-tar.sh | 91 +++++++++++++++++ 10 files changed, 222 insertions(+), 12 deletions(-) create mode 100644 core/corehttp/gateway_handler_tar.go create mode 100644 test/sharness/t0122-gateway-tar-data/inside-root.car create mode 100644 test/sharness/t0122-gateway-tar-data/outside-root.car create mode 100755 test/sharness/t0122-gateway-tar.sh diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index a96799f5875..7f0f11885e6 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -430,6 +430,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request carVersion := formatParams["version"] i.serveCAR(r.Context(), w, r, resolvedPath, contentPath, carVersion, begin) return + case "application/x-tar": + logger.Debugw("serving tar file", "path", contentPath) + i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger) + return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) webError(w, "failed respond with requested content type", err, http.StatusBadRequest) @@ -842,9 +846,10 @@ func getEtag(r *http.Request, cid cid.Cid) string { responseFormat, _, err := customResponseFormat(r) if err == nil && responseFormat != "" { // application/vnd.ipld.foo → foo - f := responseFormat[strings.LastIndex(responseFormat, ".")+1:] - // Etag: "cid.foo" (gives us nice compression together with Content-Disposition in block (raw) and car responses) - suffix = `.` + f + suffix + // application/x-bar → x-bar + shortFormat := responseFormat[strings.LastIndexAny(responseFormat, "/.")+1:] + // Etag: "cid.shortFmt" (gives us nice compression together with Content-Disposition in block (raw) and car responses) + suffix = `.` + shortFormat + suffix } // TODO: include selector suffix when https://github.com/ipfs/kubo/issues/8769 lands return prefix + cid.String() + suffix @@ -859,14 +864,17 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] return "application/vnd.ipld.raw", nil, nil case "car": return "application/vnd.ipld.car", nil, nil + case "tar": + return "application/x-tar", nil, nil } } // Browsers and other user agents will send Accept header with generic types like: // Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8 - // We only care about explciit, vendor-specific content-types. + // We only care about explicit, vendor-specific content-types. for _, accept := range r.Header.Values("Accept") { // respond to the very first ipld content type - if strings.HasPrefix(accept, "application/vnd.ipld") { + if strings.HasPrefix(accept, "application/vnd.ipld") || + strings.HasPrefix(accept, "application/x-tar") { mediatype, params, err := mime.ParseMediaType(accept) if err != nil { return "", nil, err diff --git a/core/corehttp/gateway_handler_tar.go b/core/corehttp/gateway_handler_tar.go new file mode 100644 index 00000000000..532d8875760 --- /dev/null +++ b/core/corehttp/gateway_handler_tar.go @@ -0,0 +1,92 @@ +package corehttp + +import ( + "context" + "html" + "net/http" + "time" + + files "github.com/ipfs/go-ipfs-files" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + "github.com/ipfs/kubo/tracing" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" +) + +var unixEpochTime = time.Unix(0, 0) + +func (i *gatewayHandler) serveTAR(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { + ctx, span := tracing.Span(ctx, "Gateway", "ServeTAR", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) + defer span.End() + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // Get Unixfs file + file, err := i.api.Unixfs().Get(ctx, resolvedPath) + if err != nil { + webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusBadRequest) + return + } + defer file.Close() + + rootCid := resolvedPath.Cid() + + // Set Cache-Control and read optional Last-Modified time + modtime := addCacheControlHeaders(w, r, contentPath, rootCid) + + // Weak Etag W/ because we can't guarantee byte-for-byte identical + // responses, but still want to benefit from HTTP Caching. Two TAR + // responses for the same CID will be logically equivalent, + // but when TAR is streamed, then in theory, files and directories + // may arrive in different order (depends on TAR lib and filesystem/inodes). + etag := `W/` + getEtag(r, rootCid) + w.Header().Set("Etag", etag) + + // Finish early if Etag match + if r.Header.Get("If-None-Match") == etag { + w.WriteHeader(http.StatusNotModified) + return + } + + // Set Content-Disposition + var name string + if urlFilename := r.URL.Query().Get("filename"); urlFilename != "" { + name = urlFilename + } else { + name = rootCid.String() + ".tar" + } + setContentDispositionHeader(w, name, "attachment") + + // Construct the TAR writer + tarw, err := files.NewTarWriter(w) + if err != nil { + webError(w, "could not build tar writer", err, http.StatusInternalServerError) + return + } + defer tarw.Close() + + // Sets correct Last-Modified header. This code is borrowed from the standard + // library (net/http/server.go) as we cannot use serveFile without throwing the entire + // TAR into the memory first. + if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) { + w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat)) + } + + w.Header().Set("Content-Type", "application/x-tar") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // The TAR has a top-level directory (or file) named by the CID. + if err := tarw.WriteFile(file, rootCid.String()); err != nil { + w.Header().Set("X-Stream-Error", err.Error()) + // Trailer headers do not work in web browsers + // (see https://github.com/mdn/browser-compat-data/issues/14703) + // and we have limited options around error handling in browser contexts. + // To improve UX/DX, we finish response stream with error message, allowing client to + // (1) detect error by having corrupted TAR + // (2) be able to reason what went wrong by instecting the tail of TAR stream + _, _ = w.Write([]byte(err.Error())) + return + } +} diff --git a/docs/changelogs/v0.17.md b/docs/changelogs/v0.17.md index 7fec0b24312..efe0099395b 100644 --- a/docs/changelogs/v0.17.md +++ b/docs/changelogs/v0.17.md @@ -9,15 +9,34 @@ Below is an outline of all that is in this release, so you get a sense of all th - [Kubo changelog v0.17](#kubo-changelog-v017) - [v0.17.0](#v0170) - [Overview](#overview) + - [TOC](#toc) - [🔦 Highlights](#-highlights) + - [TAR Response Format on Gateways](#tar-response-format-on-gateways) - [Changelog](#changelog) - [Contributors](#contributors) - ### 🔦 Highlights +#### TAR Response Format on Gateways + +Implemented [IPIP-288](https://github.com/ipfs/specs/pull/288) which adds +support for requesting deserialized UnixFS directory as a TAR stream. + +HTTP clients can request TAR response by passing the `?format=tar` URL +parameter, or setting `Accept: application/x-tar` HTTP header: + +```console +$ export DIR_CID=bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq +$ curl -H "Accept: application/x-tar" "http://127.0.0.1:8080/ipfs/$DIR_CID" > dir.tar +$ curl "http://127.0.0.1:8080/ipfs/$DIR_CID?format=tar" | tar xv +bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq +bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq/1 - Barrel - Part 1 - alt.txt +bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq/1 - Barrel - Part 1 - transcript.txt +bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq/1 - Barrel - Part 1.png +``` + ### Changelog diff --git a/docs/examples/kubo-as-a-library/go.mod b/docs/examples/kubo-as-a-library/go.mod index 950800dd0fe..de0527efd8d 100644 --- a/docs/examples/kubo-as-a-library/go.mod +++ b/docs/examples/kubo-as-a-library/go.mod @@ -7,7 +7,7 @@ go 1.17 replace github.com/ipfs/kubo => ./../../.. require ( - github.com/ipfs/go-ipfs-files v0.1.1 + github.com/ipfs/go-ipfs-files v0.2.0 github.com/ipfs/interface-go-ipfs-core v0.7.0 github.com/ipfs/kubo v0.14.0-rc1 github.com/libp2p/go-libp2p v0.23.2 diff --git a/docs/examples/kubo-as-a-library/go.sum b/docs/examples/kubo-as-a-library/go.sum index 76400b4106c..5345acee3b8 100644 --- a/docs/examples/kubo-as-a-library/go.sum +++ b/docs/examples/kubo-as-a-library/go.sum @@ -559,8 +559,8 @@ github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uY github.com/ipfs/go-ipfs-exchange-offline v0.3.0/go.mod h1:MOdJ9DChbb5u37M1IcbrRB02e++Z7521fMxqCNRrz9s= github.com/ipfs/go-ipfs-files v0.0.3/go.mod h1:INEFm0LL2LWXBhNJ2PMIIb2w45hpXgPjNoE7yA8Y1d4= github.com/ipfs/go-ipfs-files v0.0.8/go.mod h1:wiN/jSG8FKyk7N0WyctKSvq3ljIa2NNTiZB55kpTdOs= -github.com/ipfs/go-ipfs-files v0.1.1 h1:/MbEowmpLo9PJTEQk16m9rKzUHjeP4KRU9nWJyJO324= -github.com/ipfs/go-ipfs-files v0.1.1/go.mod h1:8xkIrMWH+Y5P7HvJ4Yc5XWwIW2e52dyXUiC0tZyjDbM= +github.com/ipfs/go-ipfs-files v0.2.0 h1:z6MCYHQSZpDWpUSK59Kf0ajP1fi4gLCf6fIulVsp8A8= +github.com/ipfs/go-ipfs-files v0.2.0/go.mod h1:vT7uaQfIsprKktzbTPLnIsd+NGw9ZbYwSq0g3N74u0M= github.com/ipfs/go-ipfs-keystore v0.0.2 h1:Fa9xg9IFD1VbiZtrNLzsD0GuELVHUFXCWF64kCPfEXU= github.com/ipfs/go-ipfs-keystore v0.0.2/go.mod h1:H49tRmibOEs7gLMgbOsjC4dqh1u5e0R/SWuc2ScfgSo= github.com/ipfs/go-ipfs-pinner v0.2.1 h1:kw9hiqh2p8TatILYZ3WAfQQABby7SQARdrdA+5Z5QfY= diff --git a/go.mod b/go.mod index 1ccd10ae0e1..43688f8bfe0 100644 --- a/go.mod +++ b/go.mod @@ -37,7 +37,7 @@ require ( github.com/ipfs/go-ipfs-cmds v0.8.1 github.com/ipfs/go-ipfs-exchange-interface v0.2.0 github.com/ipfs/go-ipfs-exchange-offline v0.3.0 - github.com/ipfs/go-ipfs-files v0.1.1 + github.com/ipfs/go-ipfs-files v0.2.0 github.com/ipfs/go-ipfs-keystore v0.0.2 github.com/ipfs/go-ipfs-pinner v0.2.1 github.com/ipfs/go-ipfs-posinfo v0.0.1 diff --git a/go.sum b/go.sum index f1917d039d0..348263ffefe 100644 --- a/go.sum +++ b/go.sum @@ -552,8 +552,8 @@ github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uY github.com/ipfs/go-ipfs-exchange-offline v0.3.0/go.mod h1:MOdJ9DChbb5u37M1IcbrRB02e++Z7521fMxqCNRrz9s= github.com/ipfs/go-ipfs-files v0.0.3/go.mod h1:INEFm0LL2LWXBhNJ2PMIIb2w45hpXgPjNoE7yA8Y1d4= github.com/ipfs/go-ipfs-files v0.0.8/go.mod h1:wiN/jSG8FKyk7N0WyctKSvq3ljIa2NNTiZB55kpTdOs= -github.com/ipfs/go-ipfs-files v0.1.1 h1:/MbEowmpLo9PJTEQk16m9rKzUHjeP4KRU9nWJyJO324= -github.com/ipfs/go-ipfs-files v0.1.1/go.mod h1:8xkIrMWH+Y5P7HvJ4Yc5XWwIW2e52dyXUiC0tZyjDbM= +github.com/ipfs/go-ipfs-files v0.2.0 h1:z6MCYHQSZpDWpUSK59Kf0ajP1fi4gLCf6fIulVsp8A8= +github.com/ipfs/go-ipfs-files v0.2.0/go.mod h1:vT7uaQfIsprKktzbTPLnIsd+NGw9ZbYwSq0g3N74u0M= github.com/ipfs/go-ipfs-keystore v0.0.2 h1:Fa9xg9IFD1VbiZtrNLzsD0GuELVHUFXCWF64kCPfEXU= github.com/ipfs/go-ipfs-keystore v0.0.2/go.mod h1:H49tRmibOEs7gLMgbOsjC4dqh1u5e0R/SWuc2ScfgSo= github.com/ipfs/go-ipfs-pinner v0.2.1 h1:kw9hiqh2p8TatILYZ3WAfQQABby7SQARdrdA+5Z5QfY= diff --git a/test/sharness/t0122-gateway-tar-data/inside-root.car b/test/sharness/t0122-gateway-tar-data/inside-root.car new file mode 100644 index 0000000000000000000000000000000000000000..c37b594f8d6d132f3d3ef955b7b2270302f3b574 GIT binary patch literal 383 zcmcColvee^xFHkc@K`SV?H{4`bU*@S0~udT5xce(!=Q& z`4-$uFH0>d&dkqaj37p}kRF!`NF7V9N~fsTRp-u?;^j5_^n32ysn~q^+RC22)y!qz zgARrYv8CnbCnXkfF>x?P6Q#irV(`1uv2n9Mx30H1R<)FoX-6hgLt#_gqO@SXiDDIH zM?cOJ;!MdbN=+`wFRFx_O;8WW>`)|0=|$r>CEmpD)2Dq)Vg@=A^_T2|g|+4n`wlZ170U$;sDID9 rootDir/ą/ę/file-źł.txt && + echo "I am a txt file in confusing /api dir" > rootDir/api/file.txt && + echo "I am a txt file in confusing /ipfs dir" > rootDir/ipfs/file.txt && + echo "I am a txt file in confusing /ipns dir" > rootDir/ipns/file.txt && + DIR_CID=$(ipfs add -Qr --cid-version 1 rootDir) && + FILE_CID=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Hash) && + FILE_SIZE=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Size) + echo "$FILE_CID / $FILE_SIZE" +' + +test_expect_success "GET TAR with format=tar and extract" ' + curl "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=tar" | tar -x +' + +test_expect_success "GET TAR with 'Accept: application/x-tar' and extract" ' + curl -H "Accept: application/x-tar" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" | tar -x +' + +test_expect_success "GET TAR with format=tar has expected Content-Type" ' + curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=tar" > curl_output_filename 2>&1 && + test_should_contain "Content-Disposition: attachment;" curl_output_filename && + test_should_contain "Etag: W/\"$FILE_CID.x-tar" curl_output_filename && + test_should_contain "Content-Type: application/x-tar" curl_output_filename +' + +test_expect_success "GET TAR with 'Accept: application/x-tar' has expected Content-Type" ' + curl -sD - -H "Accept: application/x-tar" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output_filename 2>&1 && + test_should_contain "Content-Disposition: attachment;" curl_output_filename && + test_should_contain "Etag: W/\"$FILE_CID.x-tar" curl_output_filename && + test_should_contain "Content-Type: application/x-tar" curl_output_filename +' + +test_expect_success "GET TAR has expected root file" ' + rm -rf outputDir && mkdir outputDir && + curl "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=tar" | tar -x -C outputDir && + test -f "outputDir/$FILE_CID" && + echo "I am a txt file on path with utf8" > expected && + test_cmp expected outputDir/$FILE_CID +' + +test_expect_success "GET TAR has expected root directory" ' + rm -rf outputDir && mkdir outputDir && + curl "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=tar" | tar -x -C outputDir && + test -d "outputDir/$DIR_CID" && + echo "I am a txt file on path with utf8" > expected && + test_cmp expected outputDir/$DIR_CID/ą/ę/file-źł.txt +' + +test_expect_success "GET TAR with explicit ?filename= succeeds with modified Content-Disposition header" " + curl -fo actual -D actual_headers 'http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?filename=testтест.tar&format=tar' && + grep -F 'Content-Disposition: attachment; filename=\"test____.tar\"; filename*=UTF-8'\'\''test%D1%82%D0%B5%D1%81%D1%82.tar' actual_headers +" + +test_expect_success "Add CARs with relative paths to test with" ' + ipfs dag import ../t0122-gateway-tar-data/outside-root.car > import_output && + test_should_contain $OUTSIDE_ROOT_CID import_output && + ipfs dag import ../t0122-gateway-tar-data/inside-root.car > import_output && + test_should_contain $INSIDE_ROOT_CID import_output +' + +test_expect_success "GET TAR with relative paths outside root fails" ' + curl -o - "http://127.0.0.1:$GWAY_PORT/ipfs/$OUTSIDE_ROOT_CID?format=tar" > curl_output_filename && + test_should_contain "relative UnixFS paths outside the root are now allowed" curl_output_filename +' + +test_expect_success "GET TAR with relative paths inside root works" ' + rm -rf outputDir && mkdir outputDir && + curl "http://127.0.0.1:$GWAY_PORT/ipfs/$INSIDE_ROOT_CID?format=tar" | tar -x -C outputDir && + test -f outputDir/$INSIDE_ROOT_CID/foobar/file +' + +test_kill_ipfs_daemon + +test_done