Skip to content

Commit

Permalink
feat(gateway): Block and CAR response formats (#8758)
Browse files Browse the repository at this point in the history
* feat: serveRawBlock implements ?format=block
* feat: serveCar implements ?format=car
* feat(gw): ?format= or Accept HTTP header

- extracted file-like content type responses to separate .go files
- Accept HTTP header with support for application/vnd.ipld.* types

* fix: use .bin for raw block content-disposition

.raw may be handled by something, depending on OS, and .bin
seems to be universally "binary file" across all systems:
https://en.wikipedia.org/wiki/List_of_filename_extensions_(A%E2%80%93E)

* refactor: gateway_handler_unixfs.go

- Moved UnixFS response handling to gateway_handler_unixfs*.go files.
- Removed support for X-Ipfs-Gateway-Prefix (Closes #7702)

* refactor: prefix cleanup and readable paths

- removed dead code after X-Ipfs-Gateway-Prefix is gone
  (#7702)
- escaped special characters in content paths returned with http.Error
  making them both safer and easier to reason about (e.g. when invisible
  whitespace Unicode is used)
  • Loading branch information
lidel committed Mar 17, 2022
1 parent 6774ef9 commit 4cabdfe
Show file tree
Hide file tree
Showing 14 changed files with 992 additions and 404 deletions.
474 changes: 166 additions & 308 deletions core/corehttp/gateway_handler.go

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions core/corehttp/gateway_handler_block.go
@@ -0,0 +1,38 @@
package corehttp

import (
"bytes"
"io/ioutil"
"net/http"

cid "github.com/ipfs/go-cid"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
)

// serveRawBlock returns bytes behind a raw block
func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path) {
blockReader, err := i.api.Block().Get(r.Context(), contentPath)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
block, err := ioutil.ReadAll(blockReader)
if err != nil {
webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError)
return
}
content := bytes.NewReader(block)

// Set Content-Disposition
name := blockCid.String() + ".bin"
setContentDispositionHeader(w, name, "attachment")

// Set remaining headers
modtime := addCacheControlHeaders(w, r, contentPath, blockCid)
w.Header().Set("Content-Type", "application/vnd.ipld.raw")
w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^)

// Done: http.ServeContent will take care of
// If-None-Match+Etag, Content-Length and range requests
http.ServeContent(w, r, name, modtime, content)
}
72 changes: 72 additions & 0 deletions core/corehttp/gateway_handler_car.go
@@ -0,0 +1,72 @@
package corehttp

import (
"context"
"net/http"

blocks "github.com/ipfs/go-block-format"
cid "github.com/ipfs/go-cid"
coreiface "github.com/ipfs/interface-go-ipfs-core"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
gocar "github.com/ipld/go-car"
selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse"
)

// serveCar returns a CAR stream for specific DAG+selector
func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path) {
ctx, cancel := context.WithCancel(r.Context())
defer cancel()

// Set Content-Disposition
name := rootCid.String() + ".car"
setContentDispositionHeader(w, name, "attachment")

// Weak Etag W/ because we can't guarantee byte-for-byte identical responses
// (CAR is streamed, and in theory, blocks may arrive from datastore in non-deterministic order)
etag := `W/` + getEtag(r, rootCid)
w.Header().Set("Etag", etag)

// Finish early if Etag match
if r.Header.Get("If-None-Match") == etag {
w.WriteHeader(http.StatusNotModified)
return
}

// Make it clear we don't support range-requests over a car stream
// Partial downloads and resumes should be handled using
// IPLD selectors: https://github.com/ipfs/go-ipfs/issues/8769
w.Header().Set("Accept-Ranges", "none")

// Explicit Cache-Control to ensure fresh stream on retry.
// CAR stream could be interrupted, and client should be able to resume and get full response, not the truncated one
w.Header().Set("Cache-Control", "no-cache, no-transform")

w.Header().Set("Content-Type", "application/vnd.ipld.car; version=1")
w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^)

// Same go-car settings as dag.export command
store := dagStore{dag: i.api.Dag(), ctx: ctx}

// TODO: support selectors passed as request param: https://github.com/ipfs/go-ipfs/issues/8769
dag := gocar.Dag{Root: rootCid, Selector: selectorparse.CommonSelector_ExploreAllRecursively}
car := gocar.NewSelectiveCar(ctx, store, []gocar.Dag{dag}, gocar.TraverseLinksOnlyOnce())

if err := car.Write(w); err != nil {
// We return error as a trailer, however it is not something browsers can access
// (https://github.com/mdn/browser-compat-data/issues/14703)
// Due to this, we suggest client always verify that
// the received CAR stream response is matching requested DAG selector
w.Header().Set("X-Stream-Error", err.Error())
return
}
}

type dagStore struct {
dag coreiface.APIDagService
ctx context.Context
}

func (ds dagStore) Get(c cid.Cid) (blocks.Block, error) {
obj, err := ds.dag.Get(ds.ctx, c)
return obj, err
}
37 changes: 37 additions & 0 deletions core/corehttp/gateway_handler_unixfs.go
@@ -0,0 +1,37 @@
package corehttp

import (
"fmt"
"html"
"net/http"

files "github.com/ipfs/go-ipfs-files"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"go.uber.org/zap"
)

func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, logger *zap.SugaredLogger) {
// Handling UnixFS
dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath)
if err != nil {
webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusNotFound)
return
}
defer dr.Close()

// Handling Unixfs file
if f, ok := dr.(files.File); ok {
logger.Debugw("serving unixfs file", "path", contentPath)
i.serveFile(w, r, contentPath, resolvedPath.Cid(), f)
return
}

// Handling Unixfs directory
dir, ok := dr.(files.Directory)
if !ok {
internalWebError(w, fmt.Errorf("unsupported UnixFs type"))
return
}
logger.Debugw("serving unixfs directory", "path", contentPath)
i.serveDirectory(w, r, resolvedPath, contentPath, dir, logger)
}
197 changes: 197 additions & 0 deletions core/corehttp/gateway_handler_unixfs_dir.go
@@ -0,0 +1,197 @@
package corehttp

import (
"net/http"
"net/url"
gopath "path"
"strings"

"github.com/dustin/go-humanize"
files "github.com/ipfs/go-ipfs-files"
"github.com/ipfs/go-ipfs/assets"
path "github.com/ipfs/go-path"
"github.com/ipfs/go-path/resolver"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"go.uber.org/zap"
)

// serveDirectory returns the best representation of UnixFS directory
//
// It will return index.html if present, or generate directory listing otherwise.
func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, dir files.Directory, logger *zap.SugaredLogger) {

// HostnameOption might have constructed an IPNS/IPFS path using the Host header.
// In this case, we need the original path for constructing redirects
// and links that match the requested URL.
// For example, http://example.net would become /ipns/example.net, and
// the redirects and links would end up as http://example.net/ipns/example.net
requestURI, err := url.ParseRequestURI(r.RequestURI)
if err != nil {
webError(w, "failed to parse request path", err, http.StatusInternalServerError)
return
}
originalUrlPath := requestURI.Path

// Check if directory has index.html, if so, serveFile
idxPath := ipath.Join(resolvedPath, "index.html")
idx, err := i.api.Unixfs().Get(r.Context(), idxPath)
switch err.(type) {
case nil:
cpath := contentPath.String()
dirwithoutslash := cpath[len(cpath)-1] != '/'
goget := r.URL.Query().Get("go-get") == "1"
if dirwithoutslash && !goget {
// See comment above where originalUrlPath is declared.
suffix := "/"
if r.URL.RawQuery != "" {
// preserve query parameters
suffix = suffix + "?" + r.URL.RawQuery
}

redirectURL := originalUrlPath + suffix
logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath)
http.Redirect(w, r, redirectURL, http.StatusFound)
return
}

f, ok := idx.(files.File)
if !ok {
internalWebError(w, files.ErrNotReader)
return
}

logger.Debugw("serving index.html file", "path", idxPath)
// write to request
i.serveFile(w, r, idxPath, resolvedPath.Cid(), f)
return
case resolver.ErrNoLink:
logger.Debugw("no index.html; noop", "path", idxPath)
default:
internalWebError(w, err)
return
}

// See statusResponseWriter.WriteHeader
// and https://github.com/ipfs/go-ipfs/issues/7164
// Note: this needs to occur before listingTemplate.Execute otherwise we get
// superfluous response.WriteHeader call from prometheus/client_golang
if w.Header().Get("Location") != "" {
logger.Debugw("location moved permanently", "status", http.StatusMovedPermanently)
w.WriteHeader(http.StatusMovedPermanently)
return
}

// A HTML directory index will be presented, be sure to set the correct
// type instead of relying on autodetection (which may fail).
w.Header().Set("Content-Type", "text/html")

// Generated dir index requires custom Etag (it may change between go-ipfs versions)
if assets.BindataVersionHash != "" {
dirEtag := `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"`
w.Header().Set("Etag", dirEtag)
if r.Header.Get("If-None-Match") == dirEtag {
w.WriteHeader(http.StatusNotModified)
return
}
}

if r.Method == http.MethodHead {
logger.Debug("return as request's HTTP method is HEAD")
return
}

// storage for directory listing
var dirListing []directoryItem
dirit := dir.Entries()
for dirit.Next() {
size := "?"
if s, err := dirit.Node().Size(); err == nil {
// Size may not be defined/supported. Continue anyways.
size = humanize.Bytes(uint64(s))
}

resolved, err := i.api.ResolvePath(r.Context(), ipath.Join(resolvedPath, dirit.Name()))
if err != nil {
internalWebError(w, err)
return
}
hash := resolved.Cid().String()

// See comment above where originalUrlPath is declared.
di := directoryItem{
Size: size,
Name: dirit.Name(),
Path: gopath.Join(originalUrlPath, dirit.Name()),
Hash: hash,
ShortHash: shortHash(hash),
}
dirListing = append(dirListing, di)
}
if dirit.Err() != nil {
internalWebError(w, dirit.Err())
return
}

// construct the correct back link
// https://github.com/ipfs/go-ipfs/issues/1365
var backLink string = originalUrlPath

// don't go further up than /ipfs/$hash/
pathSplit := path.SplitList(contentPath.String())
switch {
// keep backlink
case len(pathSplit) == 3: // url: /ipfs/$hash

// keep backlink
case len(pathSplit) == 4 && pathSplit[3] == "": // url: /ipfs/$hash/

// add the correct link depending on whether the path ends with a slash
default:
if strings.HasSuffix(backLink, "/") {
backLink += "./.."
} else {
backLink += "/.."
}
}

size := "?"
if s, err := dir.Size(); err == nil {
// Size may not be defined/supported. Continue anyways.
size = humanize.Bytes(uint64(s))
}

hash := resolvedPath.Cid().String()

// Gateway root URL to be used when linking to other rootIDs.
// This will be blank unless subdomain or DNSLink resolution is being used
// for this request.
var gwURL string

// Get gateway hostname and build gateway URL.
if h, ok := r.Context().Value("gw-hostname").(string); ok {
gwURL = "//" + h
} else {
gwURL = ""
}

dnslink := hasDNSLinkOrigin(gwURL, contentPath.String())

// See comment above where originalUrlPath is declared.
tplData := listingTemplateData{
GatewayURL: gwURL,
DNSLink: dnslink,
Listing: dirListing,
Size: size,
Path: contentPath.String(),
Breadcrumbs: breadcrumbs(contentPath.String(), dnslink),
BackLink: backLink,
Hash: hash,
}

logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash)

if err := listingTemplate.Execute(w, tplData); err != nil {
internalWebError(w, err)
return
}
}

0 comments on commit 4cabdfe

Please sign in to comment.