Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: gateway support for tar.gz #9034

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 17 additions & 1 deletion core/corehttp/gateway_handler.go
Expand Up @@ -430,6 +430,14 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
carVersion := formatParams["version"]
i.serveCAR(r.Context(), w, r, resolvedPath, contentPath, carVersion, begin)
return
case "application/x-tar":
logger.Debugw("serving tar file", "path", contentPath)
i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger, false)
return
case "application/x-tar+gzip":
logger.Debugw("serving tar file", "path", contentPath)
i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger, true)
return
default: // catch-all for unsuported application/vnd.*
err := fmt.Errorf("unsupported format %q", responseFormat)
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
Expand Down Expand Up @@ -873,6 +881,10 @@ func getEtag(r *http.Request, cid cid.Cid) string {
f := responseFormat[strings.LastIndex(responseFormat, ".")+1:]
// Etag: "cid.foo" (gives us nice compression together with Content-Disposition in block (raw) and car responses)
suffix = `.` + f + suffix
// Since different TAR implementations may produce different byte-for-byte responses, we define a weak Etag.
if strings.HasPrefix(responseFormat, "application/x-tar") {
prefix = "W/" + prefix
}
}
// TODO: include selector suffix when https://github.com/ipfs/go-ipfs/issues/8769 lands
return prefix + cid.String() + suffix
Expand All @@ -887,11 +899,15 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
return "application/vnd.ipld.raw", nil, nil
case "car":
return "application/vnd.ipld.car", nil, nil
case "tar":
return "application/x-tar", nil, nil
case "tar.gz":
return "application/x-tar+gzip", nil, nil
}
}
// Browsers and other user agents will send Accept header with generic types like:
// Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8
// We only care about explciit, vendor-specific content-types.
// We only care about explicit, vendor-specific content-types.
for _, accept := range r.Header.Values("Accept") {
// respond to the very first ipld content type
if strings.HasPrefix(accept, "application/vnd.ipld") {
Expand Down
94 changes: 94 additions & 0 deletions core/corehttp/gateway_handler_tar.go
@@ -0,0 +1,94 @@
package corehttp

import (
"compress/gzip"
"context"
"html"
"io"
"net/http"
"time"

files "github.com/ipfs/go-ipfs-files"
"github.com/ipfs/go-ipfs/tracing"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)

var unixEpochTime = time.Unix(0, 0)

func (i *gatewayHandler) serveTAR(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger, compressed bool) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeTAR", trace.WithAttributes(attribute.String("path", resolvedPath.String())))
defer span.End()

ctx, cancel := context.WithCancel(ctx)
defer cancel()

// Set Cache-Control and read optional Last-Modified time
modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid())

// Finish early if Etag match
if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) {
w.WriteHeader(http.StatusNotModified)
return
}

// Set Content-Disposition
var name string
if urlFilename := r.URL.Query().Get("filename"); urlFilename != "" {
name = urlFilename
} else {
name = resolvedPath.Cid().String() + ".tar"
if compressed {
name += ".gz"
}
}
setContentDispositionHeader(w, name, "attachment")

// Get Unixfs file
file, err := i.api.Unixfs().Get(ctx, resolvedPath)
if err != nil {
webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusNotFound)
return
}
defer file.Close()

// Define the output writer, maybe build a Gzip writer
var dstw io.Writer
if compressed {
gzipw := gzip.NewWriter(w)
defer gzipw.Close()

dstw = gzipw
} else {
dstw = w
}

// Construct the TAR writer
tarw, err := files.NewTarWriter(dstw)
if err != nil {
webError(w, "could not build tar writer", err, http.StatusInternalServerError)
return
}
defer tarw.Close()

// Sets correct Last-Modified header. This code is borrowed from the standard
// library (net/http/server.go) as we cannot use serveFile without throwing the entire
// TAR into the memory first.
if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) {
w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat))
}

responseFormat, _, _ := customResponseFormat(r)
w.Header().Set("Content-Type", responseFormat)

if err := tarw.WriteFile(file, name); err != nil {
// We return error as a trailer, however it is not something browsers can access
// (https://github.com/mdn/browser-compat-data/issues/14703)
// Due to this, we suggest client always verify that
// the received CAR stream response is matching requested DAG selector
w.Header().Set("X-Stream-Error", err.Error())
return
}
}
10 changes: 10 additions & 0 deletions test/sharness/t0110-gateway.sh
Expand Up @@ -271,6 +271,16 @@ test_expect_success "GET compact blocks succeeds" '
test_cmp expected actual
'

test_expect_success "GET TAR file from gateway and extract" '
curl "http://127.0.0.1:$port/ipfs/$FOO2_HASH?format=tar" | tar -x
'

test_expect_success "GET TAR file has expected Content-Type" '
curl -svX GET "http://127.0.0.1:$port/ipfs/$FOO2_HASH?format=tar" > curl_output_filename 2>&1 &&
cat curl_output_filename &&
grep "< Content-Type: application/x-tar" curl_output_filename
'

test_kill_ipfs_daemon


Expand Down