Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gateway): Block and CAR response formats #8758

Merged
merged 17 commits into from Mar 17, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
259 changes: 149 additions & 110 deletions core/corehttp/gateway_handler.go
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"html/template"
"io"
"mime"
"net/http"
"net/url"
"os"
Expand All @@ -17,8 +16,7 @@ import (
"time"

humanize "github.com/dustin/go-humanize"
"github.com/gabriel-vasile/mimetype"
"github.com/ipfs/go-cid"
cid "github.com/ipfs/go-cid"
files "github.com/ipfs/go-ipfs-files"
assets "github.com/ipfs/go-ipfs/assets"
dag "github.com/ipfs/go-merkledag"
Expand All @@ -32,11 +30,13 @@ import (
)

const (
ipfsPathPrefix = "/ipfs/"
ipnsPathPrefix = "/ipns/"
ipfsPathPrefix = "/ipfs/"
ipnsPathPrefix = "/ipns/"
immutableCacheControl = "public, max-age=29030400, immutable"
)

var onlyAscii = regexp.MustCompile("[[:^ascii:]]")
var noModtime = time.Unix(0, 0) // disables Last-Modified header if passed as modtime

// HTML-based redirect for errors which can be recovered from, but we want
// to provide hint to people that they should fix things on their end.
Expand Down Expand Up @@ -89,6 +89,7 @@ func (sw *statusResponseWriter) WriteHeader(code int) {

func newGatewayHandler(c GatewayConfig, api coreiface.CoreAPI) *gatewayHandler {
unixfsGetMetric := prometheus.NewSummaryVec(
// TODO: deprecate and switch to content type agnostic metrics: https://github.com/ipfs/go-ipfs/issues/8441
prometheus.SummaryOpts{
Namespace: "ipfs",
Subsystem: "http",
Expand Down Expand Up @@ -296,36 +297,26 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
return
}

dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath)
if err != nil {
webError(w, "ipfs cat "+escapedURLPath, err, http.StatusNotFound)
// Finish early if client already has matching Etag
// (suffix match to cover both direct CID and DirIndex cases)
cidEtagSuffix := resolvedPath.Cid().String() + `"`
if strings.HasSuffix(r.Header.Get("If-None-Match"), cidEtagSuffix) {
lidel marked this conversation as resolved.
Show resolved Hide resolved
w.WriteHeader(http.StatusNotModified)
return
}

i.unixfsGetMetric.WithLabelValues(parsedPath.Namespace()).Observe(time.Since(begin).Seconds())

defer dr.Close()

var responseEtag string

// we need to figure out whether this is a directory before doing most of the heavy lifting below
_, ok := dr.(files.Directory)

if ok && assets.BindataVersionHash != "" {
responseEtag = `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"`
} else {
responseEtag = `"` + resolvedPath.Cid().String() + `"`
}

// Check etag sent back to us
if r.Header.Get("If-None-Match") == responseEtag || r.Header.Get("If-None-Match") == `W/`+responseEtag {
w.WriteHeader(http.StatusNotModified)
// Update the global metric of the time it takes to read the final root block of the requested resource
// NOTE: for legacy reasons this happens before we go into content-type specific code paths
_, err = i.api.Block().Get(r.Context(), resolvedPath)
if err != nil {
webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError)
return
}
i.unixfsGetMetric.WithLabelValues(parsedPath.Namespace()).Observe(time.Since(begin).Seconds())

// HTTP Headers
i.addUserHeaders(w) // ok, _now_ write user's headers.
w.Header().Set("X-IPFS-Path", urlPath)
w.Header().Set("Etag", responseEtag)
w.Header().Set("X-Ipfs-Path", urlPath)

if rootCids, err := i.buildIpfsRootsHeader(urlPath, r); err == nil {
w.Header().Set("X-Ipfs-Roots", rootCids)
Expand All @@ -334,45 +325,55 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
return
}

// set these headers _after_ the error, for we may just not have it
// and don't want the client to cache a 500 response...
// and only if it's /ipfs!
// TODO: break this out when we split /ipfs /ipns routes.
modtime := time.Now()

if f, ok := dr.(files.File); ok {
if strings.HasPrefix(urlPath, ipfsPathPrefix) {
w.Header().Set("Cache-Control", "public, max-age=29030400, immutable")

// set modtime to a really long time ago, since files are immutable and should stay cached
modtime = time.Unix(1, 0)
// Support custom response formats passed via ?format or Accept HTTP header
if contentType := getExplicitContentType(r); contentType != "" {
switch contentType {
lidel marked this conversation as resolved.
Show resolved Hide resolved
case "application/vnd.ipld.raw":
logger.Debugw("serving raw block", "path", parsedPath)
i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath)
return
case "application/vnd.ipld.car":
logger.Debugw("serving car stream", "path", parsedPath)
i.serveCar(w, r, resolvedPath.Cid(), parsedPath)
return
case "application/vnd.ipld.car; version=1":
logger.Debugw("serving car stream", "path", parsedPath)
i.serveCar(w, r, resolvedPath.Cid(), parsedPath)
return
case "application/vnd.ipld.car; version=2": // no CARv2 in go-ipfs atm
lidel marked this conversation as resolved.
Show resolved Hide resolved
err := fmt.Errorf("unsupported CARv2 format, try again with CARv1")
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
return
default:
err := fmt.Errorf("unsupported format %q", contentType)
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
return
}
}

urlFilename := r.URL.Query().Get("filename")
var name string
if urlFilename != "" {
disposition := "inline"
if r.URL.Query().Get("download") == "true" {
disposition = "attachment"
}
utf8Name := url.PathEscape(urlFilename)
asciiName := url.PathEscape(onlyAscii.ReplaceAllLiteralString(urlFilename, "_"))
w.Header().Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"; filename*=UTF-8''%s", disposition, asciiName, utf8Name))
name = urlFilename
} else {
name = getFilename(urlPath)
}
// Handling Unixfs
dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath)
if err != nil {
webError(w, "ipfs cat "+escapedURLPath, err, http.StatusNotFound)
return
}
defer dr.Close()

logger.Debugw("serving file", "name", name)
i.serveFile(w, r, name, modtime, f)
// Handling Unixfs file
if f, ok := dr.(files.File); ok {
logger.Debugw("serving file", "path", parsedPath)
i.serveFile(w, r, parsedPath, resolvedPath.Cid(), f)
return
}

// Handling Unixfs directory
dir, ok := dr.(files.Directory)
if !ok {
internalWebError(w, fmt.Errorf("unsupported file type"))
return
}

// Check if directory has index.html, if so, serveFile
idxPath := ipath.Join(resolvedPath, "index.html")
idx, err := i.api.Unixfs().Get(r.Context(), idxPath)
switch err.(type) {
Expand All @@ -398,13 +399,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
internalWebError(w, files.ErrNotReader)
return
}
// static index.html → no need to generate dynamic dir-index-html
// replace mutable DirIndex Etag with immutable dir CID
w.Header().Set("Etag", `"`+resolvedPath.Cid().String()+`"`)

logger.Debugw("serving index.html file", "path", idxPath)
// write to request
i.serveFile(w, r, "index.html", modtime, f)
i.serveFile(w, r, idxPath, resolvedPath.Cid(), f)
return
case resolver.ErrNoLink:
logger.Debugw("no index.html; noop", "path", idxPath)
Expand All @@ -426,6 +424,17 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
// A HTML directory index will be presented, be sure to set the correct
// type instead of relying on autodetection (which may fail).
w.Header().Set("Content-Type", "text/html")

// Generated dir index requires custom Etag (it may change between go-ipfs versions)
if assets.BindataVersionHash != "" {
dirEtag := `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"`
w.Header().Set("Etag", dirEtag)
if r.Header.Get("If-None-Match") == dirEtag {
w.WriteHeader(http.StatusNotModified)
return
}
}

if r.Method == http.MethodHead {
logger.Debug("return as request's HTTP method is HEAD")
return
Expand Down Expand Up @@ -527,55 +536,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
}
}

func (i *gatewayHandler) serveFile(w http.ResponseWriter, req *http.Request, name string, modtime time.Time, file files.File) {
size, err := file.Size()
if err != nil {
http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway)
return
}

content := &lazySeeker{
size: size,
reader: file,
}

var ctype string
if _, isSymlink := file.(*files.Symlink); isSymlink {
// We should be smarter about resolving symlinks but this is the
// "most correct" we can be without doing that.
ctype = "inode/symlink"
} else {
ctype = mime.TypeByExtension(gopath.Ext(name))
if ctype == "" {
// uses https://github.com/gabriel-vasile/mimetype library to determine the content type.
// Fixes https://github.com/ipfs/go-ipfs/issues/7252
mimeType, err := mimetype.DetectReader(content)
if err != nil {
http.Error(w, fmt.Sprintf("cannot detect content-type: %s", err.Error()), http.StatusInternalServerError)
return
}

ctype = mimeType.String()
_, err = content.Seek(0, io.SeekStart)
if err != nil {
http.Error(w, "seeker can't seek", http.StatusInternalServerError)
return
}
}
// Strip the encoding from the HTML Content-Type header and let the
// browser figure it out.
//
// Fixes https://github.com/ipfs/go-ipfs/issues/2203
if strings.HasPrefix(ctype, "text/html;") {
ctype = "text/html"
}
}
w.Header().Set("Content-Type", ctype)

w = &statusResponseWriter{w}
http.ServeContent(w, req, name, modtime, content)
}

func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, parsedPath ipath.Path) bool {
resolved404Path, ctype, err := i.searchUpTreeFor404(r, parsedPath)
if err != nil {
Expand Down Expand Up @@ -795,6 +755,67 @@ func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) {
}
}

func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid) (modtime time.Time) {
// Set Etag to file's CID (override whatever was set before)
w.Header().Set("Etag", `"`+fileCid.String()+`"`)

// Set Cache-Control and Last-Modified based on contentPath properties
if contentPath.Mutable() {
// mutable namespaces such as /ipns/ can't be cached forever

/* For now we set Last-Modified to Now() to leverage caching heuristics built into modern browsers:
* https://github.com/ipfs/go-ipfs/pull/8074#pullrequestreview-645196768
* but we should not set it to fake values and use Cache-Control based on TTL instead */
modtime = time.Now()

// TODO: set Cache-Control based on TTL of IPNS/DNSLink: https://github.com/ipfs/go-ipfs/issues/1818#issuecomment-1015849462
// TODO: set Last-Modified if modification metadata is present in unixfs 1.5: https://github.com/ipfs/go-ipfs/issues/6920
lidel marked this conversation as resolved.
Show resolved Hide resolved

} else {
// immutable! CACHE ALL THE THINGS, FOREVER! wolololol
w.Header().Set("Cache-Control", immutableCacheControl)

// Set modtime to 'zero time' to disable Last-Modified header (superseded by Cache-Control)
modtime = noModtime

// TODO: set Last-Modified if modification metadata is present in unixfs 1.5: https://github.com/ipfs/go-ipfs/issues/6920
}

return modtime
}

// Set Content-Disposition if filename URL query param is present, return preferred filename
func addContentDispositionHeader(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) string {
/* This logic enables:
* - creation of HTML links that trigger "Save As.." dialog instead of being rendered by the browser
* - overriding the filename used when saving subresource assets on HTML page
* - providing a default filename for HTTP clients when downloading direct /ipfs/CID without any subpath
*/

// URL param ?filename=cat.jpg triggers Content-Disposition: [..] filename
// which impacts default name used in "Save As.." dialog
name := getFilename(contentPath)
urlFilename := r.URL.Query().Get("filename")
if urlFilename != "" {
disposition := "inline"
// URL param ?download=true triggers Content-Disposition: [..] attachment
// which skips rendering and forces "Save As.." dialog in browsers
if r.URL.Query().Get("download") == "true" {
disposition = "attachment"
}
setContentDispositionHeader(w, urlFilename, disposition)
name = urlFilename
}
return name
}

// Set Content-Disposition to arbitrary filename and disposition
func setContentDispositionHeader(w http.ResponseWriter, filename string, disposition string) {
utf8Name := url.PathEscape(filename)
asciiName := url.PathEscape(onlyAscii.ReplaceAllLiteralString(filename, "_"))
w.Header().Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"; filename*=UTF-8''%s", disposition, asciiName, utf8Name))
}

// Set X-Ipfs-Roots with logical CID array for efficient HTTP cache invalidation.
func (i *gatewayHandler) buildIpfsRootsHeader(contentPath string, r *http.Request) (string, error) {
/*
Expand Down Expand Up @@ -863,14 +884,32 @@ func internalWebError(w http.ResponseWriter, err error) {
webErrorWithCode(w, "internalWebError", err, http.StatusInternalServerError)
}

func getFilename(s string) string {
func getFilename(contentPath ipath.Path) string {
s := contentPath.String()
if (strings.HasPrefix(s, ipfsPathPrefix) || strings.HasPrefix(s, ipnsPathPrefix)) && strings.Count(gopath.Clean(s), "/") <= 2 {
// Don't want to treat ipfs.io in /ipns/ipfs.io as a filename.
return ""
}
return gopath.Base(s)
}

// return explicit response format if specified in request as query parameter or via Accept HTTP header
func getExplicitContentType(r *http.Request) string {
if formatParam := r.URL.Query().Get("format"); formatParam != "" {
// translate query param to a content type
switch formatParam {
case "raw":
return "application/vnd.ipld.raw"
case "car":
return "application/vnd.ipld.car"
}
}
if accept := r.Header.Get("Accept"); strings.HasPrefix(accept, "application/vnd.") {
return accept
}
return ""
}

func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, parsedPath ipath.Path) (ipath.Resolved, string, error) {
filename404, ctype, err := preferred404Filename(r.Header.Values("Accept"))
if err != nil {
Expand Down