Skip to content

Commit

Permalink
feat(gateway): TAR response format (#9029)
Browse files Browse the repository at this point in the history
Implementation of IPIP-288 (ipfs/specs#288)

Co-authored-by: Marcin Rataj <lidel@lidel.org>

This commit was moved from ipfs/kubo@a210abd
  • Loading branch information
hacdias committed Nov 9, 2022
1 parent e010084 commit 772fab3
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 5 deletions.
18 changes: 13 additions & 5 deletions gateway/core/corehttp/gateway_handler.go
Expand Up @@ -430,6 +430,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
carVersion := formatParams["version"]
i.serveCAR(r.Context(), w, r, resolvedPath, contentPath, carVersion, begin)
return
case "application/x-tar":
logger.Debugw("serving tar file", "path", contentPath)
i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
return
default: // catch-all for unsuported application/vnd.*
err := fmt.Errorf("unsupported format %q", responseFormat)
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
Expand Down Expand Up @@ -842,9 +846,10 @@ func getEtag(r *http.Request, cid cid.Cid) string {
responseFormat, _, err := customResponseFormat(r)
if err == nil && responseFormat != "" {
// application/vnd.ipld.foo → foo
f := responseFormat[strings.LastIndex(responseFormat, ".")+1:]
// Etag: "cid.foo" (gives us nice compression together with Content-Disposition in block (raw) and car responses)
suffix = `.` + f + suffix
// application/x-bar → x-bar
shortFormat := responseFormat[strings.LastIndexAny(responseFormat, "/.")+1:]
// Etag: "cid.shortFmt" (gives us nice compression together with Content-Disposition in block (raw) and car responses)
suffix = `.` + shortFormat + suffix
}
// TODO: include selector suffix when https://github.com/ipfs/kubo/issues/8769 lands
return prefix + cid.String() + suffix
Expand All @@ -859,14 +864,17 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
return "application/vnd.ipld.raw", nil, nil
case "car":
return "application/vnd.ipld.car", nil, nil
case "tar":
return "application/x-tar", nil, nil
}
}
// Browsers and other user agents will send Accept header with generic types like:
// Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8
// We only care about explciit, vendor-specific content-types.
// We only care about explicit, vendor-specific content-types.
for _, accept := range r.Header.Values("Accept") {
// respond to the very first ipld content type
if strings.HasPrefix(accept, "application/vnd.ipld") {
if strings.HasPrefix(accept, "application/vnd.ipld") ||
strings.HasPrefix(accept, "application/x-tar") {
mediatype, params, err := mime.ParseMediaType(accept)
if err != nil {
return "", nil, err
Expand Down
92 changes: 92 additions & 0 deletions gateway/core/corehttp/gateway_handler_tar.go
@@ -0,0 +1,92 @@
package corehttp

import (
"context"
"html"
"net/http"
"time"

files "github.com/ipfs/go-ipfs-files"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"github.com/ipfs/kubo/tracing"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)

var unixEpochTime = time.Unix(0, 0)

func (i *gatewayHandler) serveTAR(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeTAR", trace.WithAttributes(attribute.String("path", resolvedPath.String())))
defer span.End()

ctx, cancel := context.WithCancel(ctx)
defer cancel()

// Get Unixfs file
file, err := i.api.Unixfs().Get(ctx, resolvedPath)
if err != nil {
webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusBadRequest)
return
}
defer file.Close()

rootCid := resolvedPath.Cid()

// Set Cache-Control and read optional Last-Modified time
modtime := addCacheControlHeaders(w, r, contentPath, rootCid)

// Weak Etag W/ because we can't guarantee byte-for-byte identical
// responses, but still want to benefit from HTTP Caching. Two TAR
// responses for the same CID will be logically equivalent,
// but when TAR is streamed, then in theory, files and directories
// may arrive in different order (depends on TAR lib and filesystem/inodes).
etag := `W/` + getEtag(r, rootCid)
w.Header().Set("Etag", etag)

// Finish early if Etag match
if r.Header.Get("If-None-Match") == etag {
w.WriteHeader(http.StatusNotModified)
return
}

// Set Content-Disposition
var name string
if urlFilename := r.URL.Query().Get("filename"); urlFilename != "" {
name = urlFilename
} else {
name = rootCid.String() + ".tar"
}
setContentDispositionHeader(w, name, "attachment")

// Construct the TAR writer
tarw, err := files.NewTarWriter(w)
if err != nil {
webError(w, "could not build tar writer", err, http.StatusInternalServerError)
return
}
defer tarw.Close()

// Sets correct Last-Modified header. This code is borrowed from the standard
// library (net/http/server.go) as we cannot use serveFile without throwing the entire
// TAR into the memory first.
if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) {
w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat))
}

w.Header().Set("Content-Type", "application/x-tar")
w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^)

// The TAR has a top-level directory (or file) named by the CID.
if err := tarw.WriteFile(file, rootCid.String()); err != nil {
w.Header().Set("X-Stream-Error", err.Error())
// Trailer headers do not work in web browsers
// (see https://github.com/mdn/browser-compat-data/issues/14703)
// and we have limited options around error handling in browser contexts.
// To improve UX/DX, we finish response stream with error message, allowing client to
// (1) detect error by having corrupted TAR
// (2) be able to reason what went wrong by instecting the tail of TAR stream
_, _ = w.Write([]byte(err.Error()))
return
}
}

0 comments on commit 772fab3

Please sign in to comment.