From d9673097fab155dd7a9192d813d6b192642c7b77 Mon Sep 17 00:00:00 2001 From: Manuel Alonso Date: Tue, 15 Feb 2022 23:13:09 +0100 Subject: [PATCH 01/20] chore(gateway): debug logging for the http requests (#8518) * chore(gateway): better logging for the http requests * chore(gateway): removed defer and add more data to the final log * chore(gateway): debug logging refactor * chore(gateway): use debug w/o context when only msg * doc: add cmd for log level * chore: add more logs and address fedback * chore(gateway): log subdomains and from=requestURI, refactor * chore(gateway): fix debug redirect This commit was moved from ipfs/kubo@edb32ac3d743404118834f8c371a3fdf45c2ea66 --- gateway/core/corehttp/gateway_handler.go | 45 +++++++++++++++++++----- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index f5ee54d8c..1262101be 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -82,6 +82,7 @@ func (sw *statusResponseWriter) WriteHeader(code int) { redirect := sw.ResponseWriter.Header().Get("Location") if redirect != "" && code == http.StatusOK { code = http.StatusMovedPermanently + log.Debugw("subdomain redirect", "location", redirect, "status", code) } sw.ResponseWriter.WriteHeader(code) } @@ -198,6 +199,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request urlPath := r.URL.Path escapedURLPath := r.URL.EscapedPath() + logger := log.With("from", r.RequestURI) + logger.Debug("http request received") + // If the gateway is behind a reverse proxy and mounted at a sub-path, // the prefix header can be set to signal this sub-path. // It will be prepended to links in directory listings and the index.html redirect. @@ -210,6 +214,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request break } } + logger.Debugw("sub-path (deprecrated)", "prefix", prefix) } // HostnameOption might have constructed an IPNS/IPFS path using the Host header. @@ -242,7 +247,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request if u.RawQuery != "" { // preserve query if present path = path + "?" + u.RawQuery } - http.Redirect(w, r, gopath.Join("/", prefix, u.Scheme, u.Host, path), http.StatusMovedPermanently) + + redirectURL := gopath.Join("/", prefix, u.Scheme, u.Host, path) + logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) + http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) return } @@ -263,6 +271,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request if prefix == "" && fixupSuperfluousNamespace(w, urlPath, r.URL.RawQuery) { // the error was due to redundant namespace, which we were able to fix // by returning error/redirect page, nothing left to do here + logger.Debugw("redundant namespace; noop") return } // unable to fix path, returning error @@ -279,6 +288,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return default: if i.servePretty404IfPresent(w, r, parsedPath) { + logger.Debugw("serve pretty 404 if present") return } @@ -345,6 +355,8 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } else { name = getFilename(urlPath) } + + logger.Debugw("serving file", "name", name) i.serveFile(w, r, name, modtime, f) return } @@ -354,7 +366,8 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } - idx, err := i.api.Unixfs().Get(r.Context(), ipath.Join(resolvedPath, "index.html")) + idxPath := ipath.Join(resolvedPath, "index.html") + idx, err := i.api.Unixfs().Get(r.Context(), idxPath) switch err.(type) { case nil: dirwithoutslash := urlPath[len(urlPath)-1] != '/' @@ -366,7 +379,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request // preserve query parameters suffix = suffix + "?" + r.URL.RawQuery } - http.Redirect(w, r, originalUrlPath+suffix, 302) + + redirectURL := originalUrlPath + suffix + logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath) + http.Redirect(w, r, redirectURL, http.StatusFound) return } @@ -376,11 +392,12 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } + logger.Debugw("serving index.html file", "path", idxPath) // write to request i.serveFile(w, r, "index.html", modtime, f) return case resolver.ErrNoLink: - // no index.html; noop + logger.Debugw("no index.html; noop", "path", idxPath) default: internalWebError(w, err) return @@ -391,6 +408,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request // Note: this needs to occur before listingTemplate.Execute otherwise we get // superfluous response.WriteHeader call from prometheus/client_golang if w.Header().Get("Location") != "" { + logger.Debugw("location moved permanently", "status", http.StatusMovedPermanently) w.WriteHeader(http.StatusMovedPermanently) return } @@ -399,6 +417,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request // type instead of relying on autodetection (which may fail). w.Header().Set("Content-Type", "text/html") if r.Method == http.MethodHead { + logger.Debug("return as request's HTTP method is HEAD") return } @@ -490,8 +509,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request Hash: hash, } - err = listingTemplate.Execute(w, tplData) - if err != nil { + logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash, "duration", time.Since(begin)) + + if err := listingTemplate.Execute(w, tplData); err != nil { internalWebError(w, err) return } @@ -568,7 +588,7 @@ func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http. return false } - log.Debugf("using pretty 404 file for %s", parsedPath.String()) + log.Debugw("using pretty 404 file", "path", parsedPath) w.Header().Set("Content-Type", ctype) w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) w.WriteHeader(http.StatusNotFound) @@ -585,6 +605,7 @@ func (i *gatewayHandler) postHandler(w http.ResponseWriter, r *http.Request) { i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("IPFS-Hash", p.Cid().String()) + log.Debugw("CID created, http redirect", "from", r.URL, "to", p, "status", http.StatusCreated) http.Redirect(w, r, p.String(), http.StatusCreated) } @@ -677,7 +698,10 @@ func (i *gatewayHandler) putHandler(w http.ResponseWriter, r *http.Request) { i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("IPFS-Hash", newcid.String()) - http.Redirect(w, r, gopath.Join(ipfsPathPrefix, newcid.String(), newPath), http.StatusCreated) + + redirectURL := gopath.Join(ipfsPathPrefix, newcid.String(), newPath) + log.Debugw("CID replaced, redirect", "from", r.URL, "to", redirectURL, "status", http.StatusCreated) + http.Redirect(w, r, redirectURL, http.StatusCreated) } func (i *gatewayHandler) deleteHandler(w http.ResponseWriter, r *http.Request) { @@ -748,8 +772,11 @@ func (i *gatewayHandler) deleteHandler(w http.ResponseWriter, r *http.Request) { i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("IPFS-Hash", ncid.String()) + + redirectURL := gopath.Join(ipfsPathPrefix+ncid.String(), directory) // note: StatusCreated is technically correct here as we created a new resource. - http.Redirect(w, r, gopath.Join(ipfsPathPrefix+ncid.String(), directory), http.StatusCreated) + log.Debugw("CID deleted, redirect", "from", r.RequestURI, "to", redirectURL, "status", http.StatusCreated) + http.Redirect(w, r, redirectURL, http.StatusCreated) } func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) { From 085c3fd228ca208115fa58a02704e903a2f2848a Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 1 Mar 2022 18:04:31 +0100 Subject: [PATCH 02/20] feat: X-Ipfs-Roots for smarter HTTP caches (#8720) This commit was moved from ipfs/kubo@caba3b264340d77f0848bd5362472822e95ea101 --- gateway/core/corehttp/gateway_handler.go | 54 ++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index 1262101be..d6e45ba92 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -327,6 +327,13 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request w.Header().Set("X-IPFS-Path", urlPath) w.Header().Set("Etag", responseEtag) + if rootCids, err := i.buildIpfsRootsHeader(urlPath, r); err == nil { + w.Header().Set("X-Ipfs-Roots", rootCids) + } else { // this should never happen, as we resolved the urlPath already + webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) + return + } + // set these headers _after_ the error, for we may just not have it // and don't want the client to cache a 500 response... // and only if it's /ipfs! @@ -391,6 +398,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request internalWebError(w, files.ErrNotReader) return } + // static index.html → no need to generate dynamic dir-index-html + // replace mutable DirIndex Etag with immutable dir CID + w.Header().Set("Etag", `"`+resolvedPath.Cid().String()+`"`) logger.Debugw("serving index.html file", "path", idxPath) // write to request @@ -785,6 +795,50 @@ func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) { } } +// Set X-Ipfs-Roots with logical CID array for efficient HTTP cache invalidation. +func (i *gatewayHandler) buildIpfsRootsHeader(contentPath string, r *http.Request) (string, error) { + /* + These are logical roots where each CID represent one path segment + and resolves to either a directory or the root block of a file. + The main purpose of this header is allow HTTP caches to do smarter decisions + around cache invalidation (eg. keep specific subdirectory/file if it did not change) + + A good example is Wikipedia, which is HAMT-sharded, but we only care about + logical roots that represent each segment of the human-readable content + path: + + Given contentPath = /ipns/en.wikipedia-on-ipfs.org/wiki/Block_of_Wikipedia_in_Turkey + rootCidList is a generated by doing `ipfs resolve -r` on each sub path: + /ipns/en.wikipedia-on-ipfs.org → bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze + /ipns/en.wikipedia-on-ipfs.org/wiki/ → bafybeihn2f7lhumh4grizksi2fl233cyszqadkn424ptjajfenykpsaiw4 + /ipns/en.wikipedia-on-ipfs.org/wiki/Block_of_Wikipedia_in_Turkey → bafkreibn6euazfvoghepcm4efzqx5l3hieof2frhp254hio5y7n3hv5rma + + The result is an ordered array of values: + X-Ipfs-Roots: bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze,bafybeihn2f7lhumh4grizksi2fl233cyszqadkn424ptjajfenykpsaiw4,bafkreibn6euazfvoghepcm4efzqx5l3hieof2frhp254hio5y7n3hv5rma + + Note that while the top one will change every time any article is changed, + the last root (responsible for specific article) may not change at all. + */ + var sp strings.Builder + var pathRoots []string + pathSegments := strings.Split(contentPath[6:], "/") + sp.WriteString(contentPath[:5]) // /ipfs or /ipns + for _, root := range pathSegments { + if root == "" { + continue + } + sp.WriteString("/") + sp.WriteString(root) + resolvedSubPath, err := i.api.ResolvePath(r.Context(), ipath.New(sp.String())) + if err != nil { + return "", err + } + pathRoots = append(pathRoots, resolvedSubPath.Cid().String()) + } + rootCidList := strings.Join(pathRoots, ",") // convention from rfc2616#sec4.2 + return rootCidList, nil +} + func webError(w http.ResponseWriter, message string, err error, defaultCode int) { if _, ok := err.(resolver.ErrNoLink); ok { webErrorWithCode(w, message, err, http.StatusNotFound) From de094cb5a2060c6b69a684ba02583ca0c05db79e Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 1 Mar 2022 19:03:06 +0100 Subject: [PATCH 03/20] feat: ipfs-webui v2.15 (#8712) Release Notes: https://github.com/ipfs/ipfs-webui/releases/tag/v2.15.0 This commit was moved from ipfs/kubo@d5ad847e05865e81957c43f526600860c06dbb84 --- gateway/core/corehttp/webui.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gateway/core/corehttp/webui.go b/gateway/core/corehttp/webui.go index 72656751a..0ed60f760 100644 --- a/gateway/core/corehttp/webui.go +++ b/gateway/core/corehttp/webui.go @@ -1,11 +1,12 @@ package corehttp // TODO: move to IPNS -const WebUIPath = "/ipfs/bafybeihcyruaeza7uyjd6ugicbcrqumejf6uf353e5etdkhotqffwtguva" // v2.13.0 +const WebUIPath = "/ipfs/bafybeiednzu62vskme5wpoj4bjjikeg3xovfpp4t7vxk5ty2jxdi4mv4bu" // v2.15.0 // this is a list of all past webUI paths. var WebUIPaths = []string{ WebUIPath, + "/ipfs/bafybeihcyruaeza7uyjd6ugicbcrqumejf6uf353e5etdkhotqffwtguva", "/ipfs/bafybeiflkjt66aetfgcrgvv75izymd5kc47g6luepqmfq6zsf5w6ueth6y", "/ipfs/bafybeid26vjplsejg7t3nrh7mxmiaaxriebbm4xxrxxdunlk7o337m5sqq", "/ipfs/bafybeif4zkmu7qdhkpf3pnhwxipylqleof7rl6ojbe7mq3fzogz6m4xk3i", From 10692b9b4f19bb2edabd2b6d203ed9345cc6f7c3 Mon Sep 17 00:00:00 2001 From: Laurent Senta Date: Wed, 2 Mar 2022 14:48:24 +0100 Subject: [PATCH 04/20] fix: rewrite dependencies over the go-ipfs-config package This commit was moved from ipfs/kubo@8d549f03f3e02ef6c5efad11c9aab969fc6861ed --- gateway/core/corehttp/commands.go | 2 +- gateway/core/corehttp/gateway_test.go | 2 +- gateway/core/corehttp/hostname.go | 2 +- gateway/core/corehttp/hostname_test.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gateway/core/corehttp/commands.go b/gateway/core/corehttp/commands.go index c5443f6eb..8de1e6be4 100644 --- a/gateway/core/corehttp/commands.go +++ b/gateway/core/corehttp/commands.go @@ -16,7 +16,7 @@ import ( cmds "github.com/ipfs/go-ipfs-cmds" cmdsHttp "github.com/ipfs/go-ipfs-cmds/http" - config "github.com/ipfs/go-ipfs-config" + config "github.com/ipfs/go-ipfs/config" path "github.com/ipfs/go-path" ) diff --git a/gateway/core/corehttp/gateway_test.go b/gateway/core/corehttp/gateway_test.go index 8cccde0e2..ae0104217 100644 --- a/gateway/core/corehttp/gateway_test.go +++ b/gateway/core/corehttp/gateway_test.go @@ -19,8 +19,8 @@ import ( datastore "github.com/ipfs/go-datastore" syncds "github.com/ipfs/go-datastore/sync" - config "github.com/ipfs/go-ipfs-config" files "github.com/ipfs/go-ipfs-files" + config "github.com/ipfs/go-ipfs/config" path "github.com/ipfs/go-path" iface "github.com/ipfs/interface-go-ipfs-core" nsopts "github.com/ipfs/interface-go-ipfs-core/options/namesys" diff --git a/gateway/core/corehttp/hostname.go b/gateway/core/corehttp/hostname.go index 57c2c2191..6c0ad5bca 100644 --- a/gateway/core/corehttp/hostname.go +++ b/gateway/core/corehttp/hostname.go @@ -18,7 +18,7 @@ import ( mbase "github.com/multiformats/go-multibase" - config "github.com/ipfs/go-ipfs-config" + config "github.com/ipfs/go-ipfs/config" iface "github.com/ipfs/interface-go-ipfs-core" options "github.com/ipfs/interface-go-ipfs-core/options" nsopts "github.com/ipfs/interface-go-ipfs-core/options/namesys" diff --git a/gateway/core/corehttp/hostname_test.go b/gateway/core/corehttp/hostname_test.go index f7ba89a8c..df0f4f229 100644 --- a/gateway/core/corehttp/hostname_test.go +++ b/gateway/core/corehttp/hostname_test.go @@ -7,8 +7,8 @@ import ( "testing" cid "github.com/ipfs/go-cid" - config "github.com/ipfs/go-ipfs-config" files "github.com/ipfs/go-ipfs-files" + config "github.com/ipfs/go-ipfs/config" coreapi "github.com/ipfs/go-ipfs/core/coreapi" path "github.com/ipfs/go-path" ) From 20807cdc957b69f802213917b391d72ac6d8a2ee Mon Sep 17 00:00:00 2001 From: Gus Eggert Date: Fri, 11 Mar 2022 14:32:59 -0500 Subject: [PATCH 05/20] feat: add endpoint for enabling block profiling (#8469) This commit was moved from ipfs/kubo@0487f03eaea8f0207e2dec65809bc678813a1ec3 --- gateway/core/corehttp/mutex_profile.go | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/gateway/core/corehttp/mutex_profile.go b/gateway/core/corehttp/mutex_profile.go index fbb23340d..a8265326c 100644 --- a/gateway/core/corehttp/mutex_profile.go +++ b/gateway/core/corehttp/mutex_profile.go @@ -41,3 +41,38 @@ func MutexFractionOption(path string) ServeOption { return mux, nil } } + +// BlockProfileRateOption allows to set runtime.SetBlockProfileRate via HTTP +// using POST request with parameter 'rate'. +// The profiler tries to sample 1 event every nanoseconds. +// If rate == 1, then the profiler samples every blocking event. +// To disable, set rate = 0. +func BlockProfileRateOption(path string) ServeOption { + return func(_ *core.IpfsNode, _ net.Listener, mux *http.ServeMux) (*http.ServeMux, error) { + mux.HandleFunc(path, func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "only POST allowed", http.StatusMethodNotAllowed) + return + } + if err := r.ParseForm(); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + rateStr := r.Form.Get("rate") + if len(rateStr) == 0 { + http.Error(w, "parameter 'rate' must be set", http.StatusBadRequest) + return + } + + rate, err := strconv.Atoi(rateStr) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + log.Infof("Setting BlockProfileRate to %d", rate) + runtime.SetBlockProfileRate(rate) + }) + return mux, nil + } +} From 8fc128f6808225938c182212f72342771fe4dc11 Mon Sep 17 00:00:00 2001 From: Dave Justice Date: Wed, 16 Mar 2022 19:07:52 -0400 Subject: [PATCH 06/20] fix: allow ipfs-companion browser extension to access RPC API (#8690) * fix: add companion ids to allow origins - fixes #8689 - Adds the chrome-extension ids for ipfs-companion and ipfs-companion-beta to the allowed origins list, this allows us to accesss ipfs api from a manifest v3 extension. - added tests in t0401-api-browser-security.sh * fix: companion when custom CORS *-Origin is set Companion extension should be able to access RPC API even when custom Access-Control-Allow-Origin is set Co-authored-by: Marcin Rataj This commit was moved from ipfs/kubo@6774ef9dfdd5aa1e7b34cdd048cb8efedee4e305 --- gateway/core/corehttp/commands.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/gateway/core/corehttp/commands.go b/gateway/core/corehttp/commands.go index 8de1e6be4..14b503ff5 100644 --- a/gateway/core/corehttp/commands.go +++ b/gateway/core/corehttp/commands.go @@ -46,6 +46,11 @@ var defaultLocalhostOrigins = []string{ "https://localhost:", } +var companionBrowserExtensionOrigins = []string{ + "chrome-extension://nibjojkomfdiaoajekhjakgkdhaomnch", // ipfs-companion + "chrome-extension://hjoieblefckbooibpepigmacodalfndh", // ipfs-companion-beta +} + func addCORSFromEnv(c *cmdsHttp.ServerConfig) { origin := os.Getenv(originEnvKey) if origin != "" { @@ -84,10 +89,9 @@ func addHeadersFromConfig(c *cmdsHttp.ServerConfig, nc *config.Config) { } func addCORSDefaults(c *cmdsHttp.ServerConfig) { - // by default use localhost origins - if len(c.AllowedOrigins()) == 0 { - c.SetAllowedOrigins(defaultLocalhostOrigins...) - } + // always safelist certain origins + c.AppendAllowedOrigins(defaultLocalhostOrigins...) + c.AppendAllowedOrigins(companionBrowserExtensionOrigins...) // by default, use GET, PUT, POST if len(c.AllowedMethods()) == 0 { From 3a09cab756dc5133c1d4d3319a59ddaafebc106a Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Thu, 17 Mar 2022 17:15:24 +0100 Subject: [PATCH 07/20] feat(gateway): Block and CAR response formats (#8758) * feat: serveRawBlock implements ?format=block * feat: serveCar implements ?format=car * feat(gw): ?format= or Accept HTTP header - extracted file-like content type responses to separate .go files - Accept HTTP header with support for application/vnd.ipld.* types * fix: use .bin for raw block content-disposition .raw may be handled by something, depending on OS, and .bin seems to be universally "binary file" across all systems: https://en.wikipedia.org/wiki/List_of_filename_extensions_(A%E2%80%93E) * refactor: gateway_handler_unixfs.go - Moved UnixFS response handling to gateway_handler_unixfs*.go files. - Removed support for X-Ipfs-Gateway-Prefix (Closes #7702) * refactor: prefix cleanup and readable paths - removed dead code after X-Ipfs-Gateway-Prefix is gone (https://github.com/ipfs/go-ipfs/issues/7702) - escaped special characters in content paths returned with http.Error making them both safer and easier to reason about (e.g. when invisible whitespace Unicode is used) This commit was moved from ipfs/kubo@4cabdfefbf9b5d13e5064cedab37b01af18d78b5 --- gateway/core/corehttp/gateway_handler.go | 474 ++++++------------ .../core/corehttp/gateway_handler_block.go | 38 ++ gateway/core/corehttp/gateway_handler_car.go | 72 +++ .../core/corehttp/gateway_handler_unixfs.go | 37 ++ .../corehttp/gateway_handler_unixfs_dir.go | 197 ++++++++ .../corehttp/gateway_handler_unixfs_file.go | 83 +++ gateway/core/corehttp/gateway_test.go | 90 +--- 7 files changed, 596 insertions(+), 395 deletions(-) create mode 100644 gateway/core/corehttp/gateway_handler_block.go create mode 100644 gateway/core/corehttp/gateway_handler_car.go create mode 100644 gateway/core/corehttp/gateway_handler_unixfs.go create mode 100644 gateway/core/corehttp/gateway_handler_unixfs_dir.go create mode 100644 gateway/core/corehttp/gateway_handler_unixfs_file.go diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index d6e45ba92..45356271d 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -5,7 +5,6 @@ import ( "fmt" "html/template" "io" - "mime" "net/http" "net/url" "os" @@ -16,11 +15,8 @@ import ( "strings" "time" - humanize "github.com/dustin/go-humanize" - "github.com/gabriel-vasile/mimetype" - "github.com/ipfs/go-cid" + cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" - assets "github.com/ipfs/go-ipfs/assets" dag "github.com/ipfs/go-merkledag" mfs "github.com/ipfs/go-mfs" path "github.com/ipfs/go-path" @@ -32,11 +28,13 @@ import ( ) const ( - ipfsPathPrefix = "/ipfs/" - ipnsPathPrefix = "/ipns/" + ipfsPathPrefix = "/ipfs/" + ipnsPathPrefix = "/ipns/" + immutableCacheControl = "public, max-age=29030400, immutable" ) var onlyAscii = regexp.MustCompile("[[:^ascii:]]") +var noModtime = time.Unix(0, 0) // disables Last-Modified header if passed as modtime // HTML-based redirect for errors which can be recovered from, but we want // to provide hint to people that they should fix things on their end. @@ -89,6 +87,7 @@ func (sw *statusResponseWriter) WriteHeader(code int) { func newGatewayHandler(c GatewayConfig, api coreiface.CoreAPI) *gatewayHandler { unixfsGetMetric := prometheus.NewSummaryVec( + // TODO: deprecate and switch to content type agnostic metrics: https://github.com/ipfs/go-ipfs/issues/8441 prometheus.SummaryOpts{ Namespace: "ipfs", Subsystem: "http", @@ -196,38 +195,17 @@ func (i *gatewayHandler) optionsHandler(w http.ResponseWriter, r *http.Request) func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { begin := time.Now() - urlPath := r.URL.Path - escapedURLPath := r.URL.EscapedPath() logger := log.With("from", r.RequestURI) logger.Debug("http request received") - // If the gateway is behind a reverse proxy and mounted at a sub-path, - // the prefix header can be set to signal this sub-path. - // It will be prepended to links in directory listings and the index.html redirect. - // TODO: this feature is deprecated and will be removed (https://github.com/ipfs/go-ipfs/issues/7702) - prefix := "" - if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); len(prfx) > 0 { - for _, p := range i.config.PathPrefixes { - if prfx == p || strings.HasPrefix(prfx, p+"/") { - prefix = prfx - break - } - } - logger.Debugw("sub-path (deprecrated)", "prefix", prefix) - } - - // HostnameOption might have constructed an IPNS/IPFS path using the Host header. - // In this case, we need the original path for constructing redirects - // and links that match the requested URL. - // For example, http://example.net would become /ipns/example.net, and - // the redirects and links would end up as http://example.net/ipns/example.net - requestURI, err := url.ParseRequestURI(r.RequestURI) - if err != nil { - webError(w, "failed to parse request path", err, http.StatusInternalServerError) + // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) + // TODO: remove this after go-ipfs 0.13 ships + if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { + err := fmt.Errorf("X-Ipfs-Gateway-Prefix support was removed: https://github.com/ipfs/go-ipfs/issues/7702") + webError(w, "unsupported HTTP header", err, http.StatusBadRequest) return } - originalUrlPath := prefix + requestURI.Path // ?uri query param support for requests produced by web browsers // via navigator.registerProtocolHandler Web API @@ -248,7 +226,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request path = path + "?" + u.RawQuery } - redirectURL := gopath.Join("/", prefix, u.Scheme, u.Host, path) + redirectURL := gopath.Join("/", u.Scheme, u.Host, path) logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) return @@ -266,9 +244,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } - parsedPath := ipath.New(urlPath) - if pathErr := parsedPath.IsValid(); pathErr != nil { - if prefix == "" && fixupSuperfluousNamespace(w, urlPath, r.URL.RawQuery) { + contentPath := ipath.New(r.URL.Path) + if pathErr := contentPath.IsValid(); pathErr != nil { + if fixupSuperfluousNamespace(w, r.URL.Path, r.URL.RawQuery) { // the error was due to redundant namespace, which we were able to fix // by returning error/redirect page, nothing left to do here logger.Debugw("redundant namespace; noop") @@ -280,304 +258,75 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } // Resolve path to the final DAG node for the ETag - resolvedPath, err := i.api.ResolvePath(r.Context(), parsedPath) + resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) switch err { case nil: case coreiface.ErrOffline: - webError(w, "ipfs resolve -r "+escapedURLPath, err, http.StatusServiceUnavailable) + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) return default: - if i.servePretty404IfPresent(w, r, parsedPath) { + // if Accept is text/html, see if ipfs-404.html is present + if i.servePretty404IfPresent(w, r, contentPath) { logger.Debugw("serve pretty 404 if present") return } - webError(w, "ipfs resolve -r "+escapedURLPath, err, http.StatusNotFound) - return - } - - dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) - if err != nil { - webError(w, "ipfs cat "+escapedURLPath, err, http.StatusNotFound) + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound) return } - i.unixfsGetMetric.WithLabelValues(parsedPath.Namespace()).Observe(time.Since(begin).Seconds()) - - defer dr.Close() - - var responseEtag string + // Detect when explicit Accept header or ?format parameter are present + responseFormat := customResponseFormat(r) - // we need to figure out whether this is a directory before doing most of the heavy lifting below - _, ok := dr.(files.Directory) - - if ok && assets.BindataVersionHash != "" { - responseEtag = `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"` - } else { - responseEtag = `"` + resolvedPath.Cid().String() + `"` + // Finish early if client already has matching Etag + if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { + w.WriteHeader(http.StatusNotModified) + return } - // Check etag sent back to us - if r.Header.Get("If-None-Match") == responseEtag || r.Header.Get("If-None-Match") == `W/`+responseEtag { - w.WriteHeader(http.StatusNotModified) + // Update the global metric of the time it takes to read the final root block of the requested resource + // NOTE: for legacy reasons this happens before we go into content-type specific code paths + _, err = i.api.Block().Get(r.Context(), resolvedPath) + if err != nil { + webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) return } + i.unixfsGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) + // HTTP Headers i.addUserHeaders(w) // ok, _now_ write user's headers. - w.Header().Set("X-IPFS-Path", urlPath) - w.Header().Set("Etag", responseEtag) + w.Header().Set("X-Ipfs-Path", contentPath.String()) - if rootCids, err := i.buildIpfsRootsHeader(urlPath, r); err == nil { + if rootCids, err := i.buildIpfsRootsHeader(contentPath.String(), r); err == nil { w.Header().Set("X-Ipfs-Roots", rootCids) - } else { // this should never happen, as we resolved the urlPath already + } else { // this should never happen, as we resolved the contentPath already webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) return } - // set these headers _after_ the error, for we may just not have it - // and don't want the client to cache a 500 response... - // and only if it's /ipfs! - // TODO: break this out when we split /ipfs /ipns routes. - modtime := time.Now() - - if f, ok := dr.(files.File); ok { - if strings.HasPrefix(urlPath, ipfsPathPrefix) { - w.Header().Set("Cache-Control", "public, max-age=29030400, immutable") - - // set modtime to a really long time ago, since files are immutable and should stay cached - modtime = time.Unix(1, 0) - } - - urlFilename := r.URL.Query().Get("filename") - var name string - if urlFilename != "" { - disposition := "inline" - if r.URL.Query().Get("download") == "true" { - disposition = "attachment" - } - utf8Name := url.PathEscape(urlFilename) - asciiName := url.PathEscape(onlyAscii.ReplaceAllLiteralString(urlFilename, "_")) - w.Header().Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"; filename*=UTF-8''%s", disposition, asciiName, utf8Name)) - name = urlFilename - } else { - name = getFilename(urlPath) - } - - logger.Debugw("serving file", "name", name) - i.serveFile(w, r, name, modtime, f) + // Support custom response formats passed via ?format or Accept HTTP header + switch responseFormat { + case "": // The implicit response format is UnixFS + logger.Debugw("serving unixfs", "path", contentPath) + i.serveUnixFs(w, r, resolvedPath, contentPath, logger) return - } - dir, ok := dr.(files.Directory) - if !ok { - internalWebError(w, fmt.Errorf("unsupported file type")) + case "application/vnd.ipld.raw": + logger.Debugw("serving raw block", "path", contentPath) + i.serveRawBlock(w, r, resolvedPath.Cid(), contentPath) return - } - - idxPath := ipath.Join(resolvedPath, "index.html") - idx, err := i.api.Unixfs().Get(r.Context(), idxPath) - switch err.(type) { - case nil: - dirwithoutslash := urlPath[len(urlPath)-1] != '/' - goget := r.URL.Query().Get("go-get") == "1" - if dirwithoutslash && !goget { - // See comment above where originalUrlPath is declared. - suffix := "/" - if r.URL.RawQuery != "" { - // preserve query parameters - suffix = suffix + "?" + r.URL.RawQuery - } - - redirectURL := originalUrlPath + suffix - logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath) - http.Redirect(w, r, redirectURL, http.StatusFound) - return - } - - f, ok := idx.(files.File) - if !ok { - internalWebError(w, files.ErrNotReader) - return - } - // static index.html → no need to generate dynamic dir-index-html - // replace mutable DirIndex Etag with immutable dir CID - w.Header().Set("Etag", `"`+resolvedPath.Cid().String()+`"`) - - logger.Debugw("serving index.html file", "path", idxPath) - // write to request - i.serveFile(w, r, "index.html", modtime, f) - return - case resolver.ErrNoLink: - logger.Debugw("no index.html; noop", "path", idxPath) - default: - internalWebError(w, err) - return - } - - // See statusResponseWriter.WriteHeader - // and https://github.com/ipfs/go-ipfs/issues/7164 - // Note: this needs to occur before listingTemplate.Execute otherwise we get - // superfluous response.WriteHeader call from prometheus/client_golang - if w.Header().Get("Location") != "" { - logger.Debugw("location moved permanently", "status", http.StatusMovedPermanently) - w.WriteHeader(http.StatusMovedPermanently) + case "application/vnd.ipld.car", "application/vnd.ipld.car; version=1": + logger.Debugw("serving car stream", "path", contentPath) + i.serveCar(w, r, resolvedPath.Cid(), contentPath) return - } - - // A HTML directory index will be presented, be sure to set the correct - // type instead of relying on autodetection (which may fail). - w.Header().Set("Content-Type", "text/html") - if r.Method == http.MethodHead { - logger.Debug("return as request's HTTP method is HEAD") + default: // catch-all for unsuported application/vnd.* + err := fmt.Errorf("unsupported format %q", responseFormat) + webError(w, "failed respond with requested content type", err, http.StatusBadRequest) return } - - // storage for directory listing - var dirListing []directoryItem - dirit := dir.Entries() - for dirit.Next() { - size := "?" - if s, err := dirit.Node().Size(); err == nil { - // Size may not be defined/supported. Continue anyways. - size = humanize.Bytes(uint64(s)) - } - - resolved, err := i.api.ResolvePath(r.Context(), ipath.Join(resolvedPath, dirit.Name())) - if err != nil { - internalWebError(w, err) - return - } - hash := resolved.Cid().String() - - // See comment above where originalUrlPath is declared. - di := directoryItem{ - Size: size, - Name: dirit.Name(), - Path: gopath.Join(originalUrlPath, dirit.Name()), - Hash: hash, - ShortHash: shortHash(hash), - } - dirListing = append(dirListing, di) - } - if dirit.Err() != nil { - internalWebError(w, dirit.Err()) - return - } - - // construct the correct back link - // https://github.com/ipfs/go-ipfs/issues/1365 - var backLink string = originalUrlPath - - // don't go further up than /ipfs/$hash/ - pathSplit := path.SplitList(urlPath) - switch { - // keep backlink - case len(pathSplit) == 3: // url: /ipfs/$hash - - // keep backlink - case len(pathSplit) == 4 && pathSplit[3] == "": // url: /ipfs/$hash/ - - // add the correct link depending on whether the path ends with a slash - default: - if strings.HasSuffix(backLink, "/") { - backLink += "./.." - } else { - backLink += "/.." - } - } - - size := "?" - if s, err := dir.Size(); err == nil { - // Size may not be defined/supported. Continue anyways. - size = humanize.Bytes(uint64(s)) - } - - hash := resolvedPath.Cid().String() - - // Gateway root URL to be used when linking to other rootIDs. - // This will be blank unless subdomain or DNSLink resolution is being used - // for this request. - var gwURL string - - // Get gateway hostname and build gateway URL. - if h, ok := r.Context().Value("gw-hostname").(string); ok { - gwURL = "//" + h - } else { - gwURL = "" - } - - dnslink := hasDNSLinkOrigin(gwURL, urlPath) - - // See comment above where originalUrlPath is declared. - tplData := listingTemplateData{ - GatewayURL: gwURL, - DNSLink: dnslink, - Listing: dirListing, - Size: size, - Path: urlPath, - Breadcrumbs: breadcrumbs(urlPath, dnslink), - BackLink: backLink, - Hash: hash, - } - - logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash, "duration", time.Since(begin)) - - if err := listingTemplate.Execute(w, tplData); err != nil { - internalWebError(w, err) - return - } -} - -func (i *gatewayHandler) serveFile(w http.ResponseWriter, req *http.Request, name string, modtime time.Time, file files.File) { - size, err := file.Size() - if err != nil { - http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway) - return - } - - content := &lazySeeker{ - size: size, - reader: file, - } - - var ctype string - if _, isSymlink := file.(*files.Symlink); isSymlink { - // We should be smarter about resolving symlinks but this is the - // "most correct" we can be without doing that. - ctype = "inode/symlink" - } else { - ctype = mime.TypeByExtension(gopath.Ext(name)) - if ctype == "" { - // uses https://github.com/gabriel-vasile/mimetype library to determine the content type. - // Fixes https://github.com/ipfs/go-ipfs/issues/7252 - mimeType, err := mimetype.DetectReader(content) - if err != nil { - http.Error(w, fmt.Sprintf("cannot detect content-type: %s", err.Error()), http.StatusInternalServerError) - return - } - - ctype = mimeType.String() - _, err = content.Seek(0, io.SeekStart) - if err != nil { - http.Error(w, "seeker can't seek", http.StatusInternalServerError) - return - } - } - // Strip the encoding from the HTML Content-Type header and let the - // browser figure it out. - // - // Fixes https://github.com/ipfs/go-ipfs/issues/2203 - if strings.HasPrefix(ctype, "text/html;") { - ctype = "text/html" - } - } - w.Header().Set("Content-Type", ctype) - - w = &statusResponseWriter{w} - http.ServeContent(w, req, name, modtime, content) } -func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, parsedPath ipath.Path) bool { - resolved404Path, ctype, err := i.searchUpTreeFor404(r, parsedPath) +func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { + resolved404Path, ctype, err := i.searchUpTreeFor404(r, contentPath) if err != nil { return false } @@ -598,7 +347,7 @@ func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http. return false } - log.Debugw("using pretty 404 file", "path", parsedPath) + log.Debugw("using pretty 404 file", "path", contentPath) w.Header().Set("Content-Type", ctype) w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) w.WriteHeader(http.StatusNotFound) @@ -795,6 +544,67 @@ func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) { } } +func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid) (modtime time.Time) { + // Set Etag to based on CID (override whatever was set before) + w.Header().Set("Etag", getEtag(r, fileCid)) + + // Set Cache-Control and Last-Modified based on contentPath properties + if contentPath.Mutable() { + // mutable namespaces such as /ipns/ can't be cached forever + + /* For now we set Last-Modified to Now() to leverage caching heuristics built into modern browsers: + * https://github.com/ipfs/go-ipfs/pull/8074#pullrequestreview-645196768 + * but we should not set it to fake values and use Cache-Control based on TTL instead */ + modtime = time.Now() + + // TODO: set Cache-Control based on TTL of IPNS/DNSLink: https://github.com/ipfs/go-ipfs/issues/1818#issuecomment-1015849462 + // TODO: set Last-Modified based on /ipns/ publishing timestamp? + + } else { + // immutable! CACHE ALL THE THINGS, FOREVER! wolololol + w.Header().Set("Cache-Control", immutableCacheControl) + + // Set modtime to 'zero time' to disable Last-Modified header (superseded by Cache-Control) + modtime = noModtime + + // TODO: set Last-Modified? - TBD - /ipfs/ modification metadata is present in unixfs 1.5 https://github.com/ipfs/go-ipfs/issues/6920? + } + + return modtime +} + +// Set Content-Disposition if filename URL query param is present, return preferred filename +func addContentDispositionHeader(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) string { + /* This logic enables: + * - creation of HTML links that trigger "Save As.." dialog instead of being rendered by the browser + * - overriding the filename used when saving subresource assets on HTML page + * - providing a default filename for HTTP clients when downloading direct /ipfs/CID without any subpath + */ + + // URL param ?filename=cat.jpg triggers Content-Disposition: [..] filename + // which impacts default name used in "Save As.." dialog + name := getFilename(contentPath) + urlFilename := r.URL.Query().Get("filename") + if urlFilename != "" { + disposition := "inline" + // URL param ?download=true triggers Content-Disposition: [..] attachment + // which skips rendering and forces "Save As.." dialog in browsers + if r.URL.Query().Get("download") == "true" { + disposition = "attachment" + } + setContentDispositionHeader(w, urlFilename, disposition) + name = urlFilename + } + return name +} + +// Set Content-Disposition to arbitrary filename and disposition +func setContentDispositionHeader(w http.ResponseWriter, filename string, disposition string) { + utf8Name := url.PathEscape(filename) + asciiName := url.PathEscape(onlyAscii.ReplaceAllLiteralString(filename, "_")) + w.Header().Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"; filename*=UTF-8''%s", disposition, asciiName, utf8Name)) +} + // Set X-Ipfs-Roots with logical CID array for efficient HTTP cache invalidation. func (i *gatewayHandler) buildIpfsRootsHeader(contentPath string, r *http.Request) (string, error) { /* @@ -854,7 +664,7 @@ func webError(w http.ResponseWriter, message string, err error, defaultCode int) func webErrorWithCode(w http.ResponseWriter, message string, err error, code int) { http.Error(w, fmt.Sprintf("%s: %s", message, err), code) if code >= 500 { - log.Warnf("server error: %s: %s", err) + log.Warnf("server error: %s: %s", message, err) } } @@ -863,7 +673,8 @@ func internalWebError(w http.ResponseWriter, err error) { webErrorWithCode(w, "internalWebError", err, http.StatusInternalServerError) } -func getFilename(s string) string { +func getFilename(contentPath ipath.Path) string { + s := contentPath.String() if (strings.HasPrefix(s, ipfsPathPrefix) || strings.HasPrefix(s, ipnsPathPrefix)) && strings.Count(gopath.Clean(s), "/") <= 2 { // Don't want to treat ipfs.io in /ipns/ipfs.io as a filename. return "" @@ -871,13 +682,51 @@ func getFilename(s string) string { return gopath.Base(s) } -func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, parsedPath ipath.Path) (ipath.Resolved, string, error) { +// generate Etag value based on HTTP request and CID +func getEtag(r *http.Request, cid cid.Cid) string { + prefix := `"` + suffix := `"` + responseFormat := customResponseFormat(r) + if responseFormat != "" { + // application/vnd.ipld.foo → foo + f := responseFormat[strings.LastIndex(responseFormat, ".")+1:] + // Etag: "cid.foo" (gives us nice compression together with Content-Disposition in block (raw) and car responses) + suffix = `.` + f + suffix + } + // TODO: include selector suffix when https://github.com/ipfs/go-ipfs/issues/8769 lands + return prefix + cid.String() + suffix +} + +// return explicit response format if specified in request as query parameter or via Accept HTTP header +func customResponseFormat(r *http.Request) string { + if formatParam := r.URL.Query().Get("format"); formatParam != "" { + // translate query param to a content type + switch formatParam { + case "raw": + return "application/vnd.ipld.raw" + case "car": + return "application/vnd.ipld.car" + } + } + // Browsers and other user agents will send Accept header with generic types like: + // Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8 + // We only care about explciit, vendor-specific content-types. + for _, accept := range r.Header.Values("Accept") { + // respond to the very first ipld content type + if strings.HasPrefix(accept, "application/vnd.ipld") { + return accept + } + } + return "" +} + +func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) if err != nil { return nil, "", err } - pathComponents := strings.Split(parsedPath.String(), "/") + pathComponents := strings.Split(contentPath.String(), "/") for idx := len(pathComponents); idx >= 3; idx-- { pretty404 := gopath.Join(append(pathComponents[0:idx], filename404)...) @@ -913,6 +762,15 @@ func preferred404Filename(acceptHeaders []string) (string, string, error) { return "", "", fmt.Errorf("there is no 404 file for the requested content types") } +// returns unquoted path with all special characters revealed as \u codes +func debugStr(path string) string { + q := fmt.Sprintf("%+q", path) + if len(q) >= 3 { + q = q[1 : len(q)-1] + } + return q +} + // Attempt to fix redundant /ipfs/ namespace as long as resulting // 'intended' path is valid. This is in case gremlins were tickled // wrong way and user ended up at /ipfs/ipfs/{cid} or /ipfs/ipns/{id} diff --git a/gateway/core/corehttp/gateway_handler_block.go b/gateway/core/corehttp/gateway_handler_block.go new file mode 100644 index 000000000..3b93851d2 --- /dev/null +++ b/gateway/core/corehttp/gateway_handler_block.go @@ -0,0 +1,38 @@ +package corehttp + +import ( + "bytes" + "io/ioutil" + "net/http" + + cid "github.com/ipfs/go-cid" + ipath "github.com/ipfs/interface-go-ipfs-core/path" +) + +// serveRawBlock returns bytes behind a raw block +func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path) { + blockReader, err := i.api.Block().Get(r.Context(), contentPath) + if err != nil { + webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError) + return + } + block, err := ioutil.ReadAll(blockReader) + if err != nil { + webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError) + return + } + content := bytes.NewReader(block) + + // Set Content-Disposition + name := blockCid.String() + ".bin" + setContentDispositionHeader(w, name, "attachment") + + // Set remaining headers + modtime := addCacheControlHeaders(w, r, contentPath, blockCid) + w.Header().Set("Content-Type", "application/vnd.ipld.raw") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // Done: http.ServeContent will take care of + // If-None-Match+Etag, Content-Length and range requests + http.ServeContent(w, r, name, modtime, content) +} diff --git a/gateway/core/corehttp/gateway_handler_car.go b/gateway/core/corehttp/gateway_handler_car.go new file mode 100644 index 000000000..43ce99eef --- /dev/null +++ b/gateway/core/corehttp/gateway_handler_car.go @@ -0,0 +1,72 @@ +package corehttp + +import ( + "context" + "net/http" + + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" + coreiface "github.com/ipfs/interface-go-ipfs-core" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + gocar "github.com/ipld/go-car" + selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" +) + +// serveCar returns a CAR stream for specific DAG+selector +func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path) { + ctx, cancel := context.WithCancel(r.Context()) + defer cancel() + + // Set Content-Disposition + name := rootCid.String() + ".car" + setContentDispositionHeader(w, name, "attachment") + + // Weak Etag W/ because we can't guarantee byte-for-byte identical responses + // (CAR is streamed, and in theory, blocks may arrive from datastore in non-deterministic order) + etag := `W/` + getEtag(r, rootCid) + w.Header().Set("Etag", etag) + + // Finish early if Etag match + if r.Header.Get("If-None-Match") == etag { + w.WriteHeader(http.StatusNotModified) + return + } + + // Make it clear we don't support range-requests over a car stream + // Partial downloads and resumes should be handled using + // IPLD selectors: https://github.com/ipfs/go-ipfs/issues/8769 + w.Header().Set("Accept-Ranges", "none") + + // Explicit Cache-Control to ensure fresh stream on retry. + // CAR stream could be interrupted, and client should be able to resume and get full response, not the truncated one + w.Header().Set("Cache-Control", "no-cache, no-transform") + + w.Header().Set("Content-Type", "application/vnd.ipld.car; version=1") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // Same go-car settings as dag.export command + store := dagStore{dag: i.api.Dag(), ctx: ctx} + + // TODO: support selectors passed as request param: https://github.com/ipfs/go-ipfs/issues/8769 + dag := gocar.Dag{Root: rootCid, Selector: selectorparse.CommonSelector_ExploreAllRecursively} + car := gocar.NewSelectiveCar(ctx, store, []gocar.Dag{dag}, gocar.TraverseLinksOnlyOnce()) + + if err := car.Write(w); err != nil { + // We return error as a trailer, however it is not something browsers can access + // (https://github.com/mdn/browser-compat-data/issues/14703) + // Due to this, we suggest client always verify that + // the received CAR stream response is matching requested DAG selector + w.Header().Set("X-Stream-Error", err.Error()) + return + } +} + +type dagStore struct { + dag coreiface.APIDagService + ctx context.Context +} + +func (ds dagStore) Get(c cid.Cid) (blocks.Block, error) { + obj, err := ds.dag.Get(ds.ctx, c) + return obj, err +} diff --git a/gateway/core/corehttp/gateway_handler_unixfs.go b/gateway/core/corehttp/gateway_handler_unixfs.go new file mode 100644 index 000000000..6f476b2af --- /dev/null +++ b/gateway/core/corehttp/gateway_handler_unixfs.go @@ -0,0 +1,37 @@ +package corehttp + +import ( + "fmt" + "html" + "net/http" + + files "github.com/ipfs/go-ipfs-files" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + "go.uber.org/zap" +) + +func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, logger *zap.SugaredLogger) { + // Handling UnixFS + dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) + if err != nil { + webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusNotFound) + return + } + defer dr.Close() + + // Handling Unixfs file + if f, ok := dr.(files.File); ok { + logger.Debugw("serving unixfs file", "path", contentPath) + i.serveFile(w, r, contentPath, resolvedPath.Cid(), f) + return + } + + // Handling Unixfs directory + dir, ok := dr.(files.Directory) + if !ok { + internalWebError(w, fmt.Errorf("unsupported UnixFs type")) + return + } + logger.Debugw("serving unixfs directory", "path", contentPath) + i.serveDirectory(w, r, resolvedPath, contentPath, dir, logger) +} diff --git a/gateway/core/corehttp/gateway_handler_unixfs_dir.go b/gateway/core/corehttp/gateway_handler_unixfs_dir.go new file mode 100644 index 000000000..8e7e131dd --- /dev/null +++ b/gateway/core/corehttp/gateway_handler_unixfs_dir.go @@ -0,0 +1,197 @@ +package corehttp + +import ( + "net/http" + "net/url" + gopath "path" + "strings" + + "github.com/dustin/go-humanize" + files "github.com/ipfs/go-ipfs-files" + "github.com/ipfs/go-ipfs/assets" + path "github.com/ipfs/go-path" + "github.com/ipfs/go-path/resolver" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + "go.uber.org/zap" +) + +// serveDirectory returns the best representation of UnixFS directory +// +// It will return index.html if present, or generate directory listing otherwise. +func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, dir files.Directory, logger *zap.SugaredLogger) { + + // HostnameOption might have constructed an IPNS/IPFS path using the Host header. + // In this case, we need the original path for constructing redirects + // and links that match the requested URL. + // For example, http://example.net would become /ipns/example.net, and + // the redirects and links would end up as http://example.net/ipns/example.net + requestURI, err := url.ParseRequestURI(r.RequestURI) + if err != nil { + webError(w, "failed to parse request path", err, http.StatusInternalServerError) + return + } + originalUrlPath := requestURI.Path + + // Check if directory has index.html, if so, serveFile + idxPath := ipath.Join(resolvedPath, "index.html") + idx, err := i.api.Unixfs().Get(r.Context(), idxPath) + switch err.(type) { + case nil: + cpath := contentPath.String() + dirwithoutslash := cpath[len(cpath)-1] != '/' + goget := r.URL.Query().Get("go-get") == "1" + if dirwithoutslash && !goget { + // See comment above where originalUrlPath is declared. + suffix := "/" + if r.URL.RawQuery != "" { + // preserve query parameters + suffix = suffix + "?" + r.URL.RawQuery + } + + redirectURL := originalUrlPath + suffix + logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath) + http.Redirect(w, r, redirectURL, http.StatusFound) + return + } + + f, ok := idx.(files.File) + if !ok { + internalWebError(w, files.ErrNotReader) + return + } + + logger.Debugw("serving index.html file", "path", idxPath) + // write to request + i.serveFile(w, r, idxPath, resolvedPath.Cid(), f) + return + case resolver.ErrNoLink: + logger.Debugw("no index.html; noop", "path", idxPath) + default: + internalWebError(w, err) + return + } + + // See statusResponseWriter.WriteHeader + // and https://github.com/ipfs/go-ipfs/issues/7164 + // Note: this needs to occur before listingTemplate.Execute otherwise we get + // superfluous response.WriteHeader call from prometheus/client_golang + if w.Header().Get("Location") != "" { + logger.Debugw("location moved permanently", "status", http.StatusMovedPermanently) + w.WriteHeader(http.StatusMovedPermanently) + return + } + + // A HTML directory index will be presented, be sure to set the correct + // type instead of relying on autodetection (which may fail). + w.Header().Set("Content-Type", "text/html") + + // Generated dir index requires custom Etag (it may change between go-ipfs versions) + if assets.BindataVersionHash != "" { + dirEtag := `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"` + w.Header().Set("Etag", dirEtag) + if r.Header.Get("If-None-Match") == dirEtag { + w.WriteHeader(http.StatusNotModified) + return + } + } + + if r.Method == http.MethodHead { + logger.Debug("return as request's HTTP method is HEAD") + return + } + + // storage for directory listing + var dirListing []directoryItem + dirit := dir.Entries() + for dirit.Next() { + size := "?" + if s, err := dirit.Node().Size(); err == nil { + // Size may not be defined/supported. Continue anyways. + size = humanize.Bytes(uint64(s)) + } + + resolved, err := i.api.ResolvePath(r.Context(), ipath.Join(resolvedPath, dirit.Name())) + if err != nil { + internalWebError(w, err) + return + } + hash := resolved.Cid().String() + + // See comment above where originalUrlPath is declared. + di := directoryItem{ + Size: size, + Name: dirit.Name(), + Path: gopath.Join(originalUrlPath, dirit.Name()), + Hash: hash, + ShortHash: shortHash(hash), + } + dirListing = append(dirListing, di) + } + if dirit.Err() != nil { + internalWebError(w, dirit.Err()) + return + } + + // construct the correct back link + // https://github.com/ipfs/go-ipfs/issues/1365 + var backLink string = originalUrlPath + + // don't go further up than /ipfs/$hash/ + pathSplit := path.SplitList(contentPath.String()) + switch { + // keep backlink + case len(pathSplit) == 3: // url: /ipfs/$hash + + // keep backlink + case len(pathSplit) == 4 && pathSplit[3] == "": // url: /ipfs/$hash/ + + // add the correct link depending on whether the path ends with a slash + default: + if strings.HasSuffix(backLink, "/") { + backLink += "./.." + } else { + backLink += "/.." + } + } + + size := "?" + if s, err := dir.Size(); err == nil { + // Size may not be defined/supported. Continue anyways. + size = humanize.Bytes(uint64(s)) + } + + hash := resolvedPath.Cid().String() + + // Gateway root URL to be used when linking to other rootIDs. + // This will be blank unless subdomain or DNSLink resolution is being used + // for this request. + var gwURL string + + // Get gateway hostname and build gateway URL. + if h, ok := r.Context().Value("gw-hostname").(string); ok { + gwURL = "//" + h + } else { + gwURL = "" + } + + dnslink := hasDNSLinkOrigin(gwURL, contentPath.String()) + + // See comment above where originalUrlPath is declared. + tplData := listingTemplateData{ + GatewayURL: gwURL, + DNSLink: dnslink, + Listing: dirListing, + Size: size, + Path: contentPath.String(), + Breadcrumbs: breadcrumbs(contentPath.String(), dnslink), + BackLink: backLink, + Hash: hash, + } + + logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash) + + if err := listingTemplate.Execute(w, tplData); err != nil { + internalWebError(w, err) + return + } +} diff --git a/gateway/core/corehttp/gateway_handler_unixfs_file.go b/gateway/core/corehttp/gateway_handler_unixfs_file.go new file mode 100644 index 000000000..19e6d6795 --- /dev/null +++ b/gateway/core/corehttp/gateway_handler_unixfs_file.go @@ -0,0 +1,83 @@ +package corehttp + +import ( + "fmt" + "io" + "mime" + "net/http" + gopath "path" + "strings" + + "github.com/gabriel-vasile/mimetype" + cid "github.com/ipfs/go-cid" + files "github.com/ipfs/go-ipfs-files" + ipath "github.com/ipfs/interface-go-ipfs-core/path" +) + +// serveFile returns data behind a file along with HTTP headers based on +// the file itself, its CID and the contentPath used for accessing it. +func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid, file files.File) { + + // Set Cache-Control and read optional Last-Modified time + modtime := addCacheControlHeaders(w, r, contentPath, fileCid) + + // Set Content-Disposition + name := addContentDispositionHeader(w, r, contentPath) + + // Prepare size value for Content-Length HTTP header (set inside of http.ServeContent) + size, err := file.Size() + if err != nil { + http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway) + return + } + + // Lazy seeker enables efficient range-requests and HTTP HEAD responses + content := &lazySeeker{ + size: size, + reader: file, + } + + // Calculate deterministic value for Content-Type HTTP header + // (we prefer to do it here, rather than using implicit sniffing in http.ServeContent) + var ctype string + if _, isSymlink := file.(*files.Symlink); isSymlink { + // We should be smarter about resolving symlinks but this is the + // "most correct" we can be without doing that. + ctype = "inode/symlink" + } else { + ctype = mime.TypeByExtension(gopath.Ext(name)) + if ctype == "" { + // uses https://github.com/gabriel-vasile/mimetype library to determine the content type. + // Fixes https://github.com/ipfs/go-ipfs/issues/7252 + mimeType, err := mimetype.DetectReader(content) + if err != nil { + http.Error(w, fmt.Sprintf("cannot detect content-type: %s", err.Error()), http.StatusInternalServerError) + return + } + + ctype = mimeType.String() + _, err = content.Seek(0, io.SeekStart) + if err != nil { + http.Error(w, "seeker can't seek", http.StatusInternalServerError) + return + } + } + // Strip the encoding from the HTML Content-Type header and let the + // browser figure it out. + // + // Fixes https://github.com/ipfs/go-ipfs/issues/2203 + if strings.HasPrefix(ctype, "text/html;") { + ctype = "text/html" + } + } + // Setting explicit Content-Type to avoid mime-type sniffing on the client + // (unifies behavior across gateways and web browsers) + w.Header().Set("Content-Type", ctype) + + // special fixup around redirects + w = &statusResponseWriter{w} + + // Done: http.ServeContent will take care of + // If-None-Match+Etag, Content-Length and range requests + http.ServeContent(w, r, name, modtime, content) +} diff --git a/gateway/core/corehttp/gateway_test.go b/gateway/core/corehttp/gateway_test.go index ae0104217..2cba931dd 100644 --- a/gateway/core/corehttp/gateway_test.go +++ b/gateway/core/corehttp/gateway_test.go @@ -126,12 +126,6 @@ func newTestServerAndNode(t *testing.T, ns mockNamesys) (*httptest.Server, iface t.Fatal(err) } - cfg, err := n.Repo.Config() - if err != nil { - t.Fatal(err) - } - cfg.Gateway.PathPrefixes = []string{"/good-prefix"} - // need this variable here since we need to construct handler with // listener, and server with handler. yay cycles. dh := &delegatedHandler{} @@ -242,7 +236,7 @@ func TestGatewayGet(t *testing.T) { {"127.0.0.1:8080", "/" + k.Cid().String(), http.StatusNotFound, "404 page not found\n"}, {"127.0.0.1:8080", k.String(), http.StatusOK, "fnord"}, {"127.0.0.1:8080", "/ipns/nxdomain.example.com", http.StatusNotFound, "ipfs resolve -r /ipns/nxdomain.example.com: " + namesys.ErrResolveFailed.Error() + "\n"}, - {"127.0.0.1:8080", "/ipns/%0D%0A%0D%0Ahello", http.StatusNotFound, "ipfs resolve -r /ipns/%0D%0A%0D%0Ahello: " + namesys.ErrResolveFailed.Error() + "\n"}, + {"127.0.0.1:8080", "/ipns/%0D%0A%0D%0Ahello", http.StatusNotFound, "ipfs resolve -r /ipns/\\r\\n\\r\\nhello: " + namesys.ErrResolveFailed.Error() + "\n"}, {"127.0.0.1:8080", "/ipns/example.com", http.StatusOK, "fnord"}, {"example.com", "/", http.StatusOK, "fnord"}, @@ -403,7 +397,6 @@ func TestIPNSHostnameRedirect(t *testing.T) { t.Fatal(err) } req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "/good-prefix") res, err = doWithoutRedirect(req) if err != nil { @@ -417,8 +410,8 @@ func TestIPNSHostnameRedirect(t *testing.T) { hdr = res.Header["Location"] if len(hdr) < 1 { t.Errorf("location header not present") - } else if hdr[0] != "/good-prefix/foo/" { - t.Errorf("location header is %v, expected /good-prefix/foo/", hdr[0]) + } else if hdr[0] != "/foo/" { + t.Errorf("location header is %v, expected /foo/", hdr[0]) } // make sure /version isn't exposed @@ -427,7 +420,6 @@ func TestIPNSHostnameRedirect(t *testing.T) { t.Fatal(err) } req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "/good-prefix") res, err = doWithoutRedirect(req) if err != nil { @@ -583,82 +575,6 @@ func TestIPNSHostnameBacklinks(t *testing.T) { if !strings.Contains(s, k3.Cid().String()) { t.Fatalf("expected hash in directory listing") } - - // make request to directory listing with prefix - req, err = http.NewRequest(http.MethodGet, ts.URL, nil) - if err != nil { - t.Fatal(err) - } - req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "/good-prefix") - - res, err = doWithoutRedirect(req) - if err != nil { - t.Fatal(err) - } - - // expect correct backlinks with prefix - body, err = ioutil.ReadAll(res.Body) - if err != nil { - t.Fatalf("error reading response: %s", err) - } - s = string(body) - t.Logf("body: %s\n", string(body)) - - if !matchPathOrBreadcrumbs(s, "/ipns/example.net") { - t.Fatalf("expected a path in directory listing") - } - if !strings.Contains(s, "") { - t.Fatalf("expected backlink in directory listing") - } - if !strings.Contains(s, "") { - t.Fatalf("expected file in directory listing") - } - if !strings.Contains(s, k.Cid().String()) { - t.Fatalf("expected hash in directory listing") - } - - // make request to directory listing with illegal prefix - req, err = http.NewRequest(http.MethodGet, ts.URL, nil) - if err != nil { - t.Fatal(err) - } - req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "/bad-prefix") - - // make request to directory listing with evil prefix - req, err = http.NewRequest(http.MethodGet, ts.URL, nil) - if err != nil { - t.Fatal(err) - } - req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "//good-prefix/foo") - - res, err = doWithoutRedirect(req) - if err != nil { - t.Fatal(err) - } - - // expect correct backlinks without illegal prefix - body, err = ioutil.ReadAll(res.Body) - if err != nil { - t.Fatalf("error reading response: %s", err) - } - s = string(body) - t.Logf("body: %s\n", string(body)) - - if !matchPathOrBreadcrumbs(s, "/") { - t.Fatalf("expected a path in directory listing") - } - if !strings.Contains(s, "") { - t.Fatalf("expected backlink in directory listing") - } - if !strings.Contains(s, "") { - t.Fatalf("expected file in directory listing") - } - if !strings.Contains(s, k.Cid().String()) { - t.Fatalf("expected hash in directory listing") - } } func TestCacheControlImmutable(t *testing.T) { From 7927c02c507fe1412a09a05f9215dbe898fa6560 Mon Sep 17 00:00:00 2001 From: Adin Schmahmann Date: Mon, 21 Mar 2022 10:57:08 -0400 Subject: [PATCH 08/20] feat: add gateway histogram metrics (#8443) * feat(gw): response type histogram metrics - response-type agnostic firstContentBlockGetMetric which counts the latency til the first content block. - car/block/file/gen-dir-index duration histogram metrics that show how long each response type takes * docs: improve metrics descriptions * feat: more gw histogram buckets 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60 secs as suggested in reviews at https://github.com/ipfs/go-ipfs/pull/8443 Co-authored-by: Marcin Rataj Co-authored-by: Gus Eggert This commit was moved from ipfs/kubo@beaa8fc29b472214283b9aab884ed92f03908d13 --- gateway/core/corehttp/gateway_handler.go | 107 +++++++++++++++--- .../core/corehttp/gateway_handler_block.go | 6 +- gateway/core/corehttp/gateway_handler_car.go | 6 +- .../core/corehttp/gateway_handler_unixfs.go | 7 +- .../corehttp/gateway_handler_unixfs_dir.go | 8 +- .../corehttp/gateway_handler_unixfs_file.go | 6 +- 6 files changed, 116 insertions(+), 24 deletions(-) diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index 45356271d..eca2efff6 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -62,7 +62,15 @@ type gatewayHandler struct { config GatewayConfig api coreiface.CoreAPI - unixfsGetMetric *prometheus.SummaryVec + // generic metrics + firstContentBlockGetMetric *prometheus.HistogramVec + unixfsGetMetric *prometheus.SummaryVec // deprecated, use firstContentBlockGetMetric + + // response type metrics + unixfsFileGetMetric *prometheus.HistogramVec + unixfsGenDirGetMetric *prometheus.HistogramVec + carStreamGetMetric *prometheus.HistogramVec + rawBlockGetMetric *prometheus.HistogramVec } // StatusResponseWriter enables us to override HTTP Status Code passed to @@ -85,29 +93,93 @@ func (sw *statusResponseWriter) WriteHeader(code int) { sw.ResponseWriter.WriteHeader(code) } -func newGatewayHandler(c GatewayConfig, api coreiface.CoreAPI) *gatewayHandler { - unixfsGetMetric := prometheus.NewSummaryVec( - // TODO: deprecate and switch to content type agnostic metrics: https://github.com/ipfs/go-ipfs/issues/8441 +func newGatewaySummaryMetric(name string, help string) *prometheus.SummaryVec { + summaryMetric := prometheus.NewSummaryVec( prometheus.SummaryOpts{ Namespace: "ipfs", Subsystem: "http", - Name: "unixfs_get_latency_seconds", - Help: "The time till the first block is received when 'getting' a file from the gateway.", + Name: name, + Help: help, + }, + []string{"gateway"}, + ) + if err := prometheus.Register(summaryMetric); err != nil { + if are, ok := err.(prometheus.AlreadyRegisteredError); ok { + summaryMetric = are.ExistingCollector.(*prometheus.SummaryVec) + } else { + log.Errorf("failed to register ipfs_http_%s: %v", name, err) + } + } + return summaryMetric +} + +func newGatewayHistogramMetric(name string, help string) *prometheus.HistogramVec { + // We can add buckets as a parameter in the future, but for now using static defaults + // suggested in https://github.com/ipfs/go-ipfs/issues/8441 + defaultBuckets := []float64{0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, 30, 60} + histogramMetric := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: "ipfs", + Subsystem: "http", + Name: name, + Help: help, + Buckets: defaultBuckets, }, []string{"gateway"}, ) - if err := prometheus.Register(unixfsGetMetric); err != nil { + if err := prometheus.Register(histogramMetric); err != nil { if are, ok := err.(prometheus.AlreadyRegisteredError); ok { - unixfsGetMetric = are.ExistingCollector.(*prometheus.SummaryVec) + histogramMetric = are.ExistingCollector.(*prometheus.HistogramVec) } else { - log.Errorf("failed to register unixfsGetMetric: %v", err) + log.Errorf("failed to register ipfs_http_%s: %v", name, err) } } + return histogramMetric +} +func newGatewayHandler(c GatewayConfig, api coreiface.CoreAPI) *gatewayHandler { i := &gatewayHandler{ - config: c, - api: api, - unixfsGetMetric: unixfsGetMetric, + config: c, + api: api, + // Improved Metrics + // ---------------------------- + // Time till the first content block (bar in /ipfs/cid/foo/bar) + // (format-agnostic, across all response types) + firstContentBlockGetMetric: newGatewayHistogramMetric( + "gw_first_content_block_get_latency_seconds", + "The time till the first content block is received on GET from the gateway.", + ), + + // Response-type specific metrics + // ---------------------------- + // UnixFS: time it takes to return a file + unixfsFileGetMetric: newGatewayHistogramMetric( + "gw_unixfs_file_get_duration_seconds", + "The time to serve an entire UnixFS file from the gateway.", + ), + // UnixFS: time it takes to generate static HTML with directory listing + unixfsGenDirGetMetric: newGatewayHistogramMetric( + "gw_unixfs_gen_dir_listing_get_duration_seconds", + "The time to serve a generated UnixFS HTML directory listing from the gateway.", + ), + // CAR: time it takes to return requested CAR stream + carStreamGetMetric: newGatewayHistogramMetric( + "gw_car_stream_get_duration_seconds", + "The time to GET an entire CAR stream from the gateway.", + ), + // Block: time it takes to return requested Block + rawBlockGetMetric: newGatewayHistogramMetric( + "gw_raw_block_get_duration_seconds", + "The time to GET an entire raw Block from the gateway.", + ), + + // Legacy Metrics + // ---------------------------- + unixfsGetMetric: newGatewaySummaryMetric( // TODO: remove? + // (deprecated, use firstContentBlockGetMetric instead) + "unixfs_get_latency_seconds", + "The time to receive the first UnixFS node on a GET from the gateway.", + ), } return i } @@ -291,7 +363,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) return } - i.unixfsGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) + ns := contentPath.Namespace() + timeToGetFirstContentBlock := time.Since(begin).Seconds() + i.unixfsGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) // deprecated, use firstContentBlockGetMetric instead + i.firstContentBlockGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) // HTTP Headers i.addUserHeaders(w) // ok, _now_ write user's headers. @@ -308,15 +383,15 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request switch responseFormat { case "": // The implicit response format is UnixFS logger.Debugw("serving unixfs", "path", contentPath) - i.serveUnixFs(w, r, resolvedPath, contentPath, logger) + i.serveUnixFs(w, r, resolvedPath, contentPath, begin, logger) return case "application/vnd.ipld.raw": logger.Debugw("serving raw block", "path", contentPath) - i.serveRawBlock(w, r, resolvedPath.Cid(), contentPath) + i.serveRawBlock(w, r, resolvedPath.Cid(), contentPath, begin) return case "application/vnd.ipld.car", "application/vnd.ipld.car; version=1": logger.Debugw("serving car stream", "path", contentPath) - i.serveCar(w, r, resolvedPath.Cid(), contentPath) + i.serveCar(w, r, resolvedPath.Cid(), contentPath, begin) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) diff --git a/gateway/core/corehttp/gateway_handler_block.go b/gateway/core/corehttp/gateway_handler_block.go index 3b93851d2..13d7ebefd 100644 --- a/gateway/core/corehttp/gateway_handler_block.go +++ b/gateway/core/corehttp/gateway_handler_block.go @@ -4,13 +4,14 @@ import ( "bytes" "io/ioutil" "net/http" + "time" cid "github.com/ipfs/go-cid" ipath "github.com/ipfs/interface-go-ipfs-core/path" ) // serveRawBlock returns bytes behind a raw block -func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path) { +func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path, begin time.Time) { blockReader, err := i.api.Block().Get(r.Context(), contentPath) if err != nil { webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError) @@ -35,4 +36,7 @@ func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, b // Done: http.ServeContent will take care of // If-None-Match+Etag, Content-Length and range requests http.ServeContent(w, r, name, modtime, content) + + // Update metrics + i.rawBlockGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) } diff --git a/gateway/core/corehttp/gateway_handler_car.go b/gateway/core/corehttp/gateway_handler_car.go index 43ce99eef..5f0f2117f 100644 --- a/gateway/core/corehttp/gateway_handler_car.go +++ b/gateway/core/corehttp/gateway_handler_car.go @@ -3,6 +3,7 @@ package corehttp import ( "context" "net/http" + "time" blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" @@ -13,7 +14,7 @@ import ( ) // serveCar returns a CAR stream for specific DAG+selector -func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path) { +func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path, begin time.Time) { ctx, cancel := context.WithCancel(r.Context()) defer cancel() @@ -59,6 +60,9 @@ func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCi w.Header().Set("X-Stream-Error", err.Error()) return } + + // Update metrics + i.carStreamGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) } type dagStore struct { diff --git a/gateway/core/corehttp/gateway_handler_unixfs.go b/gateway/core/corehttp/gateway_handler_unixfs.go index 6f476b2af..ed15f4139 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs.go +++ b/gateway/core/corehttp/gateway_handler_unixfs.go @@ -4,13 +4,14 @@ import ( "fmt" "html" "net/http" + "time" files "github.com/ipfs/go-ipfs-files" ipath "github.com/ipfs/interface-go-ipfs-core/path" "go.uber.org/zap" ) -func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, logger *zap.SugaredLogger) { +func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { // Handling UnixFS dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) if err != nil { @@ -22,7 +23,7 @@ func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, res // Handling Unixfs file if f, ok := dr.(files.File); ok { logger.Debugw("serving unixfs file", "path", contentPath) - i.serveFile(w, r, contentPath, resolvedPath.Cid(), f) + i.serveFile(w, r, contentPath, resolvedPath.Cid(), f, begin) return } @@ -33,5 +34,5 @@ func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, res return } logger.Debugw("serving unixfs directory", "path", contentPath) - i.serveDirectory(w, r, resolvedPath, contentPath, dir, logger) + i.serveDirectory(w, r, resolvedPath, contentPath, dir, begin, logger) } diff --git a/gateway/core/corehttp/gateway_handler_unixfs_dir.go b/gateway/core/corehttp/gateway_handler_unixfs_dir.go index 8e7e131dd..87708159e 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_dir.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_dir.go @@ -5,6 +5,7 @@ import ( "net/url" gopath "path" "strings" + "time" "github.com/dustin/go-humanize" files "github.com/ipfs/go-ipfs-files" @@ -18,7 +19,7 @@ import ( // serveDirectory returns the best representation of UnixFS directory // // It will return index.html if present, or generate directory listing otherwise. -func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, dir files.Directory, logger *zap.SugaredLogger) { +func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, dir files.Directory, begin time.Time, logger *zap.SugaredLogger) { // HostnameOption might have constructed an IPNS/IPFS path using the Host header. // In this case, we need the original path for constructing redirects @@ -62,7 +63,7 @@ func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, logger.Debugw("serving index.html file", "path", idxPath) // write to request - i.serveFile(w, r, idxPath, resolvedPath.Cid(), f) + i.serveFile(w, r, idxPath, resolvedPath.Cid(), f, begin) return case resolver.ErrNoLink: logger.Debugw("no index.html; noop", "path", idxPath) @@ -194,4 +195,7 @@ func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, internalWebError(w, err) return } + + // Update metrics + i.unixfsGenDirGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) } diff --git a/gateway/core/corehttp/gateway_handler_unixfs_file.go b/gateway/core/corehttp/gateway_handler_unixfs_file.go index 19e6d6795..9807969fe 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_file.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_file.go @@ -7,6 +7,7 @@ import ( "net/http" gopath "path" "strings" + "time" "github.com/gabriel-vasile/mimetype" cid "github.com/ipfs/go-cid" @@ -16,7 +17,7 @@ import ( // serveFile returns data behind a file along with HTTP headers based on // the file itself, its CID and the contentPath used for accessing it. -func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid, file files.File) { +func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid, file files.File, begin time.Time) { // Set Cache-Control and read optional Last-Modified time modtime := addCacheControlHeaders(w, r, contentPath, fileCid) @@ -80,4 +81,7 @@ func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, conte // Done: http.ServeContent will take care of // If-None-Match+Etag, Content-Length and range requests http.ServeContent(w, r, name, modtime, content) + + // Update metrics + i.unixfsFileGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) } From 97a588865af4418d82224d3806ae48510e4ef6be Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Fri, 1 Apr 2022 18:12:46 +0200 Subject: [PATCH 09/20] fix(gw): validate requested CAR version (#8835) * fix(gw): validate requested CAR version This adds validation of 'application/vnd.ipld.car;version=n' passed in the Accept header by HTTP clients to align Gateway behavior with the spec submitted to IANA. * test: fix comment in test/sharness/t0118-gateway-car.sh Co-authored-by: Gus Eggert Co-authored-by: Gus Eggert This commit was moved from ipfs/kubo@5fa556945e2a9733f39e1bfcc242cba4c31c070b --- gateway/core/corehttp/gateway_handler.go | 30 +++++++++++++------- gateway/core/corehttp/gateway_handler_car.go | 12 +++++++- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index eca2efff6..6d90dd008 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -5,6 +5,7 @@ import ( "fmt" "html/template" "io" + "mime" "net/http" "net/url" "os" @@ -348,7 +349,11 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } // Detect when explicit Accept header or ?format parameter are present - responseFormat := customResponseFormat(r) + responseFormat, formatParams, err := customResponseFormat(r) + if err != nil { + webError(w, "error while processing the Accept header", err, http.StatusBadRequest) + return + } // Finish early if client already has matching Etag if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { @@ -389,9 +394,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request logger.Debugw("serving raw block", "path", contentPath) i.serveRawBlock(w, r, resolvedPath.Cid(), contentPath, begin) return - case "application/vnd.ipld.car", "application/vnd.ipld.car; version=1": + case "application/vnd.ipld.car": logger.Debugw("serving car stream", "path", contentPath) - i.serveCar(w, r, resolvedPath.Cid(), contentPath, begin) + carVersion := formatParams["version"] + i.serveCar(w, r, resolvedPath.Cid(), contentPath, carVersion, begin) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) @@ -761,8 +767,8 @@ func getFilename(contentPath ipath.Path) string { func getEtag(r *http.Request, cid cid.Cid) string { prefix := `"` suffix := `"` - responseFormat := customResponseFormat(r) - if responseFormat != "" { + responseFormat, _, err := customResponseFormat(r) + if err == nil && responseFormat != "" { // application/vnd.ipld.foo → foo f := responseFormat[strings.LastIndex(responseFormat, ".")+1:] // Etag: "cid.foo" (gives us nice compression together with Content-Disposition in block (raw) and car responses) @@ -773,14 +779,14 @@ func getEtag(r *http.Request, cid cid.Cid) string { } // return explicit response format if specified in request as query parameter or via Accept HTTP header -func customResponseFormat(r *http.Request) string { +func customResponseFormat(r *http.Request) (mediaType string, params map[string]string, err error) { if formatParam := r.URL.Query().Get("format"); formatParam != "" { // translate query param to a content type switch formatParam { case "raw": - return "application/vnd.ipld.raw" + return "application/vnd.ipld.raw", nil, nil case "car": - return "application/vnd.ipld.car" + return "application/vnd.ipld.car", nil, nil } } // Browsers and other user agents will send Accept header with generic types like: @@ -789,10 +795,14 @@ func customResponseFormat(r *http.Request) string { for _, accept := range r.Header.Values("Accept") { // respond to the very first ipld content type if strings.HasPrefix(accept, "application/vnd.ipld") { - return accept + mediatype, params, err := mime.ParseMediaType(accept) + if err != nil { + return "", nil, err + } + return mediatype, params, nil } } - return "" + return "", nil, nil } func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { diff --git a/gateway/core/corehttp/gateway_handler_car.go b/gateway/core/corehttp/gateway_handler_car.go index 5f0f2117f..c6587e564 100644 --- a/gateway/core/corehttp/gateway_handler_car.go +++ b/gateway/core/corehttp/gateway_handler_car.go @@ -2,6 +2,7 @@ package corehttp import ( "context" + "fmt" "net/http" "time" @@ -14,10 +15,19 @@ import ( ) // serveCar returns a CAR stream for specific DAG+selector -func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path, begin time.Time) { +func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path, carVersion string, begin time.Time) { ctx, cancel := context.WithCancel(r.Context()) defer cancel() + switch carVersion { + case "": // noop, client does not care about version + case "1": // noop, we support this + default: + err := fmt.Errorf("only version=1 is supported") + webError(w, "unsupported CAR version", err, http.StatusBadRequest) + return + } + // Set Content-Disposition name := rootCid.String() + ".car" setContentDispositionHeader(w, name, "attachment") From 57bfeaee9e7e958e6eb5f1644156e5d9f44a376d Mon Sep 17 00:00:00 2001 From: Gus Eggert Date: Mon, 4 Apr 2022 13:24:05 -0400 Subject: [PATCH 10/20] feat: add basic gateway tracing (#8595) * add deprecation warning when tracer plugins are loaded * add response format attribute to span in gateway handler * add note about tracing's experimental status in godoc * add nil check for TTL when adding name span attrs * add basic sharness test for integration with otel collector * add nil check in UnixFSAPI.processLink * test: sharness check all json objs for swarm span * add env var docs to docs/environment-variables.md * chore: pin the otel collector version * add tracing spans per response type (#8841) * docs: tracing with jaeger-ui Co-authored-by: Marcin Rataj This commit was moved from ipfs/kubo@f855bfe6ef8fe8a2633df889ce766cddc8d0effb --- gateway/core/corehttp/gateway.go | 5 ++++- gateway/core/corehttp/gateway_handler.go | 8 ++++++-- gateway/core/corehttp/gateway_handler_block.go | 11 ++++++++--- gateway/core/corehttp/gateway_handler_car.go | 10 ++++++++-- gateway/core/corehttp/gateway_handler_unixfs.go | 9 +++++++-- gateway/core/corehttp/gateway_handler_unixfs_dir.go | 11 ++++++++--- gateway/core/corehttp/gateway_handler_unixfs_file.go | 10 +++++++--- 7 files changed, 48 insertions(+), 16 deletions(-) diff --git a/gateway/core/corehttp/gateway.go b/gateway/core/corehttp/gateway.go index fb1524da5..2e794b53f 100644 --- a/gateway/core/corehttp/gateway.go +++ b/gateway/core/corehttp/gateway.go @@ -9,6 +9,7 @@ import ( version "github.com/ipfs/go-ipfs" core "github.com/ipfs/go-ipfs/core" coreapi "github.com/ipfs/go-ipfs/core/coreapi" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" options "github.com/ipfs/interface-go-ipfs-core/options" id "github.com/libp2p/go-libp2p/p2p/protocol/identify" @@ -87,12 +88,14 @@ func GatewayOption(writable bool, paths ...string) ServeOption { "X-Stream-Output", }, headers[ACEHeadersName]...)) - gateway := newGatewayHandler(GatewayConfig{ + var gateway http.Handler = newGatewayHandler(GatewayConfig{ Headers: headers, Writable: writable, PathPrefixes: cfg.Gateway.PathPrefixes, }, api) + gateway = otelhttp.NewHandler(gateway, "Gateway.Request") + for _, p := range paths { mux.Handle(p+"/", gateway) } diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index 6d90dd008..32d2eebae 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -26,6 +26,8 @@ import ( ipath "github.com/ipfs/interface-go-ipfs-core/path" routing "github.com/libp2p/go-libp2p-core/routing" prometheus "github.com/prometheus/client_golang/prometheus" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" ) const ( @@ -354,6 +356,8 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request webError(w, "error while processing the Accept header", err, http.StatusBadRequest) return } + trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat)) + trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) // Finish early if client already has matching Etag if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { @@ -392,12 +396,12 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return case "application/vnd.ipld.raw": logger.Debugw("serving raw block", "path", contentPath) - i.serveRawBlock(w, r, resolvedPath.Cid(), contentPath, begin) + i.serveRawBlock(w, r, resolvedPath, contentPath, begin) return case "application/vnd.ipld.car": logger.Debugw("serving car stream", "path", contentPath) carVersion := formatParams["version"] - i.serveCar(w, r, resolvedPath.Cid(), contentPath, carVersion, begin) + i.serveCar(w, r, resolvedPath, contentPath, carVersion, begin) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) diff --git a/gateway/core/corehttp/gateway_handler_block.go b/gateway/core/corehttp/gateway_handler_block.go index 13d7ebefd..891c418c8 100644 --- a/gateway/core/corehttp/gateway_handler_block.go +++ b/gateway/core/corehttp/gateway_handler_block.go @@ -6,13 +6,18 @@ import ( "net/http" "time" - cid "github.com/ipfs/go-cid" + "github.com/ipfs/go-ipfs/tracing" ipath "github.com/ipfs/interface-go-ipfs-core/path" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" ) // serveRawBlock returns bytes behind a raw block -func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path, begin time.Time) { - blockReader, err := i.api.Block().Get(r.Context(), contentPath) +func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time) { + ctx, span := tracing.Span(r.Context(), "Gateway", "ServeRawBlock", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) + defer span.End() + blockCid := resolvedPath.Cid() + blockReader, err := i.api.Block().Get(ctx, resolvedPath) if err != nil { webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError) return diff --git a/gateway/core/corehttp/gateway_handler_car.go b/gateway/core/corehttp/gateway_handler_car.go index c6587e564..d7dca46b3 100644 --- a/gateway/core/corehttp/gateway_handler_car.go +++ b/gateway/core/corehttp/gateway_handler_car.go @@ -8,15 +8,20 @@ import ( blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" + "github.com/ipfs/go-ipfs/tracing" coreiface "github.com/ipfs/interface-go-ipfs-core" ipath "github.com/ipfs/interface-go-ipfs-core/path" gocar "github.com/ipld/go-car" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" ) // serveCar returns a CAR stream for specific DAG+selector -func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path, carVersion string, begin time.Time) { - ctx, cancel := context.WithCancel(r.Context()) +func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, carVersion string, begin time.Time) { + ctx, span := tracing.Span(r.Context(), "Gateway", "ServeCar", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) + defer span.End() + ctx, cancel := context.WithCancel(ctx) defer cancel() switch carVersion { @@ -27,6 +32,7 @@ func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCi webError(w, "unsupported CAR version", err, http.StatusBadRequest) return } + rootCid := resolvedPath.Cid() // Set Content-Disposition name := rootCid.String() + ".car" diff --git a/gateway/core/corehttp/gateway_handler_unixfs.go b/gateway/core/corehttp/gateway_handler_unixfs.go index ed15f4139..2252b3891 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs.go +++ b/gateway/core/corehttp/gateway_handler_unixfs.go @@ -7,13 +7,18 @@ import ( "time" files "github.com/ipfs/go-ipfs-files" + "github.com/ipfs/go-ipfs/tracing" ipath "github.com/ipfs/interface-go-ipfs-core/path" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { + ctx, span := tracing.Span(r.Context(), "Gateway", "ServeUnixFs", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) + defer span.End() // Handling UnixFS - dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) + dr, err := i.api.Unixfs().Get(ctx, resolvedPath) if err != nil { webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusNotFound) return @@ -23,7 +28,7 @@ func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, res // Handling Unixfs file if f, ok := dr.(files.File); ok { logger.Debugw("serving unixfs file", "path", contentPath) - i.serveFile(w, r, contentPath, resolvedPath.Cid(), f, begin) + i.serveFile(w, r, resolvedPath, contentPath, f, begin) return } diff --git a/gateway/core/corehttp/gateway_handler_unixfs_dir.go b/gateway/core/corehttp/gateway_handler_unixfs_dir.go index 87708159e..e458e8030 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_dir.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_dir.go @@ -10,9 +10,12 @@ import ( "github.com/dustin/go-humanize" files "github.com/ipfs/go-ipfs-files" "github.com/ipfs/go-ipfs/assets" + "github.com/ipfs/go-ipfs/tracing" path "github.com/ipfs/go-path" "github.com/ipfs/go-path/resolver" ipath "github.com/ipfs/interface-go-ipfs-core/path" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) @@ -20,6 +23,8 @@ import ( // // It will return index.html if present, or generate directory listing otherwise. func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, dir files.Directory, begin time.Time, logger *zap.SugaredLogger) { + ctx, span := tracing.Span(r.Context(), "Gateway", "ServeDirectory", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) + defer span.End() // HostnameOption might have constructed an IPNS/IPFS path using the Host header. // In this case, we need the original path for constructing redirects @@ -35,7 +40,7 @@ func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, // Check if directory has index.html, if so, serveFile idxPath := ipath.Join(resolvedPath, "index.html") - idx, err := i.api.Unixfs().Get(r.Context(), idxPath) + idx, err := i.api.Unixfs().Get(ctx, idxPath) switch err.(type) { case nil: cpath := contentPath.String() @@ -63,7 +68,7 @@ func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, logger.Debugw("serving index.html file", "path", idxPath) // write to request - i.serveFile(w, r, idxPath, resolvedPath.Cid(), f, begin) + i.serveFile(w, r, resolvedPath, idxPath, f, begin) return case resolver.ErrNoLink: logger.Debugw("no index.html; noop", "path", idxPath) @@ -111,7 +116,7 @@ func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, size = humanize.Bytes(uint64(s)) } - resolved, err := i.api.ResolvePath(r.Context(), ipath.Join(resolvedPath, dirit.Name())) + resolved, err := i.api.ResolvePath(ctx, ipath.Join(resolvedPath, dirit.Name())) if err != nil { internalWebError(w, err) return diff --git a/gateway/core/corehttp/gateway_handler_unixfs_file.go b/gateway/core/corehttp/gateway_handler_unixfs_file.go index 9807969fe..e8a3718fc 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_file.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_file.go @@ -10,17 +10,21 @@ import ( "time" "github.com/gabriel-vasile/mimetype" - cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" + "github.com/ipfs/go-ipfs/tracing" ipath "github.com/ipfs/interface-go-ipfs-core/path" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" ) // serveFile returns data behind a file along with HTTP headers based on // the file itself, its CID and the contentPath used for accessing it. -func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid, file files.File, begin time.Time) { +func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, file files.File, begin time.Time) { + _, span := tracing.Span(r.Context(), "Gateway", "ServeFile", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) + defer span.End() // Set Cache-Control and read optional Last-Modified time - modtime := addCacheControlHeaders(w, r, contentPath, fileCid) + modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid()) // Set Content-Disposition name := addContentDispositionHeader(w, r, contentPath) From c2c9c1e5c73e7586e0c65e04ade37f798eb2b75f Mon Sep 17 00:00:00 2001 From: Ian Davis Date: Fri, 8 Apr 2022 21:07:44 +0100 Subject: [PATCH 11/20] fix(gw): update metrics only when payload data sent (#8827) * fix: report gateway http metrics only when response is successful * fix(gw): 304 Not Modified as no-op This fix ensures we don't do any additional work when Etag match what user already has in their own cache. Co-authored-by: Marcin Rataj This commit was moved from ipfs/kubo@fbf76663f4db6f3c4ed89d8c017d9319d2727121 --- gateway/core/corehttp/gateway_handler.go | 60 +++++++++++++++++-- .../core/corehttp/gateway_handler_block.go | 10 ++-- .../corehttp/gateway_handler_unixfs_dir.go | 7 ++- .../corehttp/gateway_handler_unixfs_file.go | 11 ++-- 4 files changed, 74 insertions(+), 14 deletions(-) diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index 32d2eebae..b14b88739 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -36,8 +36,10 @@ const ( immutableCacheControl = "public, max-age=29030400, immutable" ) -var onlyAscii = regexp.MustCompile("[[:^ascii:]]") -var noModtime = time.Unix(0, 0) // disables Last-Modified header if passed as modtime +var ( + onlyAscii = regexp.MustCompile("[[:^ascii:]]") + noModtime = time.Unix(0, 0) // disables Last-Modified header if passed as modtime +) // HTML-based redirect for errors which can be recovered from, but we want // to provide hint to people that they should fix things on their end. @@ -96,6 +98,54 @@ func (sw *statusResponseWriter) WriteHeader(code int) { sw.ResponseWriter.WriteHeader(code) } +// ServeContent replies to the request using the content in the provided ReadSeeker +// and returns the status code written and any error encountered during a write. +// It wraps http.ServeContent which takes care of If-None-Match+Etag, +// Content-Length and range requests. +func ServeContent(w http.ResponseWriter, req *http.Request, name string, modtime time.Time, content io.ReadSeeker) (int, bool, error) { + ew := &errRecordingResponseWriter{ResponseWriter: w} + http.ServeContent(ew, req, name, modtime, content) + + // When we calculate some metrics we want a flag that lets us to ignore + // errors and 304 Not Modified, and only care when requested data + // was sent in full. + dataSent := ew.code/100 == 2 && ew.err == nil + + return ew.code, dataSent, ew.err +} + +// errRecordingResponseWriter wraps a ResponseWriter to record the status code and any write error. +type errRecordingResponseWriter struct { + http.ResponseWriter + code int + err error +} + +func (w *errRecordingResponseWriter) WriteHeader(code int) { + if w.code == 0 { + w.code = code + } + w.ResponseWriter.WriteHeader(code) +} + +func (w *errRecordingResponseWriter) Write(p []byte) (int, error) { + n, err := w.ResponseWriter.Write(p) + if err != nil && w.err == nil { + w.err = err + } + return n, err +} + +// ReadFrom exposes errRecordingResponseWriter's underlying ResponseWriter to io.Copy +// to allow optimized methods to be taken advantage of. +func (w *errRecordingResponseWriter) ReadFrom(r io.Reader) (n int64, err error) { + n, err = io.Copy(w.ResponseWriter, r) + if err != nil && w.err == nil { + w.err = err + } + return n, err +} + func newGatewaySummaryMetric(name string, help string) *prometheus.SummaryVec { summaryMetric := prometheus.NewSummaryVec( prometheus.SummaryOpts{ @@ -360,7 +410,8 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) // Finish early if client already has matching Etag - if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { + ifNoneMatch := r.Header.Get("If-None-Match") + if ifNoneMatch == getEtag(r, resolvedPath.Cid()) || ifNoneMatch == getDirListingEtag(resolvedPath.Cid()) { w.WriteHeader(http.StatusNotModified) return } @@ -401,7 +452,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request case "application/vnd.ipld.car": logger.Debugw("serving car stream", "path", contentPath) carVersion := formatParams["version"] - i.serveCar(w, r, resolvedPath, contentPath, carVersion, begin) + i.serveCar(w, r, resolvedPath, contentPath, carVersion, begin) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) @@ -644,7 +695,6 @@ func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath // TODO: set Cache-Control based on TTL of IPNS/DNSLink: https://github.com/ipfs/go-ipfs/issues/1818#issuecomment-1015849462 // TODO: set Last-Modified based on /ipns/ publishing timestamp? - } else { // immutable! CACHE ALL THE THINGS, FOREVER! wolololol w.Header().Set("Cache-Control", immutableCacheControl) diff --git a/gateway/core/corehttp/gateway_handler_block.go b/gateway/core/corehttp/gateway_handler_block.go index 891c418c8..afd553d30 100644 --- a/gateway/core/corehttp/gateway_handler_block.go +++ b/gateway/core/corehttp/gateway_handler_block.go @@ -38,10 +38,12 @@ func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, r w.Header().Set("Content-Type", "application/vnd.ipld.raw") w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) - // Done: http.ServeContent will take care of + // ServeContent will take care of // If-None-Match+Etag, Content-Length and range requests - http.ServeContent(w, r, name, modtime, content) + _, dataSent, _ := ServeContent(w, r, name, modtime, content) - // Update metrics - i.rawBlockGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) + if dataSent { + // Update metrics + i.rawBlockGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) + } } diff --git a/gateway/core/corehttp/gateway_handler_unixfs_dir.go b/gateway/core/corehttp/gateway_handler_unixfs_dir.go index e458e8030..158277135 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_dir.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_dir.go @@ -8,6 +8,7 @@ import ( "time" "github.com/dustin/go-humanize" + cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" "github.com/ipfs/go-ipfs/assets" "github.com/ipfs/go-ipfs/tracing" @@ -93,7 +94,7 @@ func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, // Generated dir index requires custom Etag (it may change between go-ipfs versions) if assets.BindataVersionHash != "" { - dirEtag := `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"` + dirEtag := getDirListingEtag(resolvedPath.Cid()) w.Header().Set("Etag", dirEtag) if r.Header.Get("If-None-Match") == dirEtag { w.WriteHeader(http.StatusNotModified) @@ -204,3 +205,7 @@ func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, // Update metrics i.unixfsGenDirGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) } + +func getDirListingEtag(dirCid cid.Cid) string { + return `"DirIndex-` + assets.BindataVersionHash + `_CID-` + dirCid.String() + `"` +} diff --git a/gateway/core/corehttp/gateway_handler_unixfs_file.go b/gateway/core/corehttp/gateway_handler_unixfs_file.go index e8a3718fc..2938c8f48 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_file.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_file.go @@ -82,10 +82,13 @@ func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, resol // special fixup around redirects w = &statusResponseWriter{w} - // Done: http.ServeContent will take care of + // ServeContent will take care of // If-None-Match+Etag, Content-Length and range requests - http.ServeContent(w, r, name, modtime, content) + _, dataSent, _ := ServeContent(w, r, name, modtime, content) - // Update metrics - i.unixfsFileGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) + // Was response successful? + if dataSent { + // Update metrics + i.unixfsFileGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) + } } From 3be8c5a7229c741bfed81674755eeec0f49c8d79 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Fri, 8 Apr 2022 17:09:23 -0400 Subject: [PATCH 12/20] fix(gw): missing return if dir fails to finalize (#8806) This commit was moved from ipfs/kubo@52bf1339460220b80d6afdd21eb710ef7d8eaf18 --- gateway/core/corehttp/gateway_handler.go | 1 + 1 file changed, 1 insertion(+) diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index b14b88739..d2446450b 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -662,6 +662,7 @@ func (i *gatewayHandler) deleteHandler(w http.ResponseWriter, r *http.Request) { nnode, err := root.GetDirectory().GetNode() if err != nil { webError(w, "WritableGateway: failed to finalize", err, http.StatusInternalServerError) + return } ncid := nnode.Cid() From 03c018793a450d8f4e02ede5be59ece53bbd5a21 Mon Sep 17 00:00:00 2001 From: Gus Eggert Date: Mon, 11 Apr 2022 17:09:00 -0400 Subject: [PATCH 13/20] fix: fix context plumbing in gateway handlers (#8871) This ensures that child contexts are passed around between the handlers so that traces show the call hierarchy correctly. This commit was moved from ipfs/kubo@9bd346e25004df4fd7927ab151097f975fb433c2 --- gateway/core/corehttp/gateway_handler.go | 6 +++--- gateway/core/corehttp/gateway_handler_block.go | 5 +++-- gateway/core/corehttp/gateway_handler_car.go | 6 +++--- gateway/core/corehttp/gateway_handler_unixfs.go | 11 ++++++----- gateway/core/corehttp/gateway_handler_unixfs_dir.go | 7 ++++--- gateway/core/corehttp/gateway_handler_unixfs_file.go | 5 +++-- 6 files changed, 22 insertions(+), 18 deletions(-) diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index d2446450b..0c34cc9b1 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -443,16 +443,16 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request switch responseFormat { case "": // The implicit response format is UnixFS logger.Debugw("serving unixfs", "path", contentPath) - i.serveUnixFs(w, r, resolvedPath, contentPath, begin, logger) + i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger) return case "application/vnd.ipld.raw": logger.Debugw("serving raw block", "path", contentPath) - i.serveRawBlock(w, r, resolvedPath, contentPath, begin) + i.serveRawBlock(r.Context(), w, r, resolvedPath, contentPath, begin) return case "application/vnd.ipld.car": logger.Debugw("serving car stream", "path", contentPath) carVersion := formatParams["version"] - i.serveCar(w, r, resolvedPath, contentPath, carVersion, begin) + i.serveCAR(r.Context(), w, r, resolvedPath, contentPath, carVersion, begin) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) diff --git a/gateway/core/corehttp/gateway_handler_block.go b/gateway/core/corehttp/gateway_handler_block.go index afd553d30..8d6ce0f36 100644 --- a/gateway/core/corehttp/gateway_handler_block.go +++ b/gateway/core/corehttp/gateway_handler_block.go @@ -2,6 +2,7 @@ package corehttp import ( "bytes" + "context" "io/ioutil" "net/http" "time" @@ -13,8 +14,8 @@ import ( ) // serveRawBlock returns bytes behind a raw block -func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time) { - ctx, span := tracing.Span(r.Context(), "Gateway", "ServeRawBlock", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) +func (i *gatewayHandler) serveRawBlock(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time) { + ctx, span := tracing.Span(ctx, "Gateway", "ServeRawBlock", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() blockCid := resolvedPath.Cid() blockReader, err := i.api.Block().Get(ctx, resolvedPath) diff --git a/gateway/core/corehttp/gateway_handler_car.go b/gateway/core/corehttp/gateway_handler_car.go index d7dca46b3..195808870 100644 --- a/gateway/core/corehttp/gateway_handler_car.go +++ b/gateway/core/corehttp/gateway_handler_car.go @@ -17,9 +17,9 @@ import ( "go.opentelemetry.io/otel/trace" ) -// serveCar returns a CAR stream for specific DAG+selector -func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, carVersion string, begin time.Time) { - ctx, span := tracing.Span(r.Context(), "Gateway", "ServeCar", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) +// serveCAR returns a CAR stream for specific DAG+selector +func (i *gatewayHandler) serveCAR(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, carVersion string, begin time.Time) { + ctx, span := tracing.Span(ctx, "Gateway", "ServeCAR", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() ctx, cancel := context.WithCancel(ctx) defer cancel() diff --git a/gateway/core/corehttp/gateway_handler_unixfs.go b/gateway/core/corehttp/gateway_handler_unixfs.go index 2252b3891..f91e2df3b 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs.go +++ b/gateway/core/corehttp/gateway_handler_unixfs.go @@ -1,6 +1,7 @@ package corehttp import ( + "context" "fmt" "html" "net/http" @@ -14,8 +15,8 @@ import ( "go.uber.org/zap" ) -func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { - ctx, span := tracing.Span(r.Context(), "Gateway", "ServeUnixFs", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) +func (i *gatewayHandler) serveUnixFS(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { + ctx, span := tracing.Span(ctx, "Gateway", "ServeUnixFS", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() // Handling UnixFS dr, err := i.api.Unixfs().Get(ctx, resolvedPath) @@ -28,16 +29,16 @@ func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, res // Handling Unixfs file if f, ok := dr.(files.File); ok { logger.Debugw("serving unixfs file", "path", contentPath) - i.serveFile(w, r, resolvedPath, contentPath, f, begin) + i.serveFile(ctx, w, r, resolvedPath, contentPath, f, begin) return } // Handling Unixfs directory dir, ok := dr.(files.Directory) if !ok { - internalWebError(w, fmt.Errorf("unsupported UnixFs type")) + internalWebError(w, fmt.Errorf("unsupported UnixFS type")) return } logger.Debugw("serving unixfs directory", "path", contentPath) - i.serveDirectory(w, r, resolvedPath, contentPath, dir, begin, logger) + i.serveDirectory(ctx, w, r, resolvedPath, contentPath, dir, begin, logger) } diff --git a/gateway/core/corehttp/gateway_handler_unixfs_dir.go b/gateway/core/corehttp/gateway_handler_unixfs_dir.go index 158277135..7d491ea49 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_dir.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_dir.go @@ -1,6 +1,7 @@ package corehttp import ( + "context" "net/http" "net/url" gopath "path" @@ -23,8 +24,8 @@ import ( // serveDirectory returns the best representation of UnixFS directory // // It will return index.html if present, or generate directory listing otherwise. -func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, dir files.Directory, begin time.Time, logger *zap.SugaredLogger) { - ctx, span := tracing.Span(r.Context(), "Gateway", "ServeDirectory", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) +func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, dir files.Directory, begin time.Time, logger *zap.SugaredLogger) { + ctx, span := tracing.Span(ctx, "Gateway", "ServeDirectory", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() // HostnameOption might have constructed an IPNS/IPFS path using the Host header. @@ -69,7 +70,7 @@ func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, logger.Debugw("serving index.html file", "path", idxPath) // write to request - i.serveFile(w, r, resolvedPath, idxPath, f, begin) + i.serveFile(ctx, w, r, resolvedPath, idxPath, f, begin) return case resolver.ErrNoLink: logger.Debugw("no index.html; noop", "path", idxPath) diff --git a/gateway/core/corehttp/gateway_handler_unixfs_file.go b/gateway/core/corehttp/gateway_handler_unixfs_file.go index 2938c8f48..1852705fd 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_file.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_file.go @@ -1,6 +1,7 @@ package corehttp import ( + "context" "fmt" "io" "mime" @@ -19,8 +20,8 @@ import ( // serveFile returns data behind a file along with HTTP headers based on // the file itself, its CID and the contentPath used for accessing it. -func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, file files.File, begin time.Time) { - _, span := tracing.Span(r.Context(), "Gateway", "ServeFile", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) +func (i *gatewayHandler) serveFile(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, file files.File, begin time.Time) { + _, span := tracing.Span(ctx, "Gateway", "ServeFile", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() // Set Cache-Control and read optional Last-Modified time From e01550b957dbacff3305bea1a059ac9e7e1ec7ff Mon Sep 17 00:00:00 2001 From: Franky W Date: Thu, 31 Mar 2022 12:25:49 +0200 Subject: [PATCH 14/20] Remove gobindata Since go1.16, there are built in tools that allow for embeding filesystem inside the binary. We now make use of the `embed` package to have all files put into the binary, removing the need to generate the files and removes dependencies Co-authored-by: Jorropo This commit was moved from ipfs/kubo@9210c08fa69c454e14fc183b5d4237ddecbf3550 --- gateway/core/corehttp/gateway_handler_unixfs_dir.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gateway/core/corehttp/gateway_handler_unixfs_dir.go b/gateway/core/corehttp/gateway_handler_unixfs_dir.go index 7d491ea49..f462e52f8 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_dir.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_dir.go @@ -94,7 +94,7 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit w.Header().Set("Content-Type", "text/html") // Generated dir index requires custom Etag (it may change between go-ipfs versions) - if assets.BindataVersionHash != "" { + if assets.AssetHash != "" { dirEtag := getDirListingEtag(resolvedPath.Cid()) w.Header().Set("Etag", dirEtag) if r.Header.Get("If-None-Match") == dirEtag { @@ -208,5 +208,5 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit } func getDirListingEtag(dirCid cid.Cid) string { - return `"DirIndex-` + assets.BindataVersionHash + `_CID-` + dirCid.String() + `"` + return `"DirIndex-` + assets.AssetHash + `_CID-` + dirCid.String() + `"` } From 2703c9a1a6ba367fb6c93616337b5eba26b621e3 Mon Sep 17 00:00:00 2001 From: Franky W Date: Mon, 11 Apr 2022 15:31:46 +0200 Subject: [PATCH 15/20] Change `assets.Asset` from a `func` to the embed.FS This removes the delegation to the function and requires all callers that used the `asset.Asset` func to access to asset via the `embed.FS` This commit was moved from ipfs/kubo@70398d275cad281bb55362632dd5a1ce7cb6e80e --- gateway/core/corehttp/gateway_indexPage.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gateway/core/corehttp/gateway_indexPage.go b/gateway/core/corehttp/gateway_indexPage.go index 3bee4822b..fbea91649 100644 --- a/gateway/core/corehttp/gateway_indexPage.go +++ b/gateway/core/corehttp/gateway_indexPage.go @@ -94,7 +94,7 @@ func hasDNSLinkOrigin(gwURL string, path string) bool { var listingTemplate *template.Template func init() { - knownIconsBytes, err := assets.Asset("dir-index-html/knownIcons.txt") + knownIconsBytes, err := assets.Asset.ReadFile("dir-index-html/knownIcons.txt") if err != nil { panic(err) } @@ -121,7 +121,7 @@ func init() { } // Directory listing template - dirIndexBytes, err := assets.Asset("dir-index-html/dir-index.html") + dirIndexBytes, err := assets.Asset.ReadFile("dir-index-html/dir-index.html") if err != nil { panic(err) } From 177ce46592403836ed0dce416faf54e49b09f3ac Mon Sep 17 00:00:00 2001 From: Justin Johnson Date: Fri, 15 Apr 2022 09:06:43 -0500 Subject: [PATCH 16/20] chore(gw): extract logical functions to improve readability (#8885) * Extract functions from getOrHeadHandler to improve readability and prepare for later refactorings * Address PR feedback on when to return errors or booleans * Be explicit about use of *requestError vs error This commit was moved from ipfs/kubo@e07baf5835f646352df14fa21ed641053cd0f81b --- gateway/core/corehttp/gateway_handler.go | 212 +++++++++++++++-------- 1 file changed, 139 insertions(+), 73 deletions(-) diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index 0c34cc9b1..52d86257b 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -28,6 +28,7 @@ import ( prometheus "github.com/prometheus/client_golang/prometheus" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" ) const ( @@ -85,6 +86,25 @@ type statusResponseWriter struct { http.ResponseWriter } +// Custom type for collecting error details to be handled by `webRequestError` +type requestError struct { + Message string + StatusCode int + Err error +} + +func (r *requestError) Error() string { + return r.Err.Error() +} + +func newRequestError(message string, err error, statusCode int) *requestError { + return &requestError{ + Message: message, + Err: err, + StatusCode: statusCode, + } +} + func (sw *statusResponseWriter) WriteHeader(code int) { // Check if we need to adjust Status Code to account for scheduled redirect // This enables us to return payload along with HTTP 301 @@ -324,61 +344,22 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request logger := log.With("from", r.RequestURI) logger.Debug("http request received") - // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) - // TODO: remove this after go-ipfs 0.13 ships - if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { - err := fmt.Errorf("X-Ipfs-Gateway-Prefix support was removed: https://github.com/ipfs/go-ipfs/issues/7702") - webError(w, "unsupported HTTP header", err, http.StatusBadRequest) + if err := handleUnsupportedHeaders(r); err != nil { + webRequestError(w, err) return } - // ?uri query param support for requests produced by web browsers - // via navigator.registerProtocolHandler Web API - // https://developer.mozilla.org/en-US/docs/Web/API/Navigator/registerProtocolHandler - // TLDR: redirect /ipfs/?uri=ipfs%3A%2F%2Fcid%3Fquery%3Dval to /ipfs/cid?query=val - if uriParam := r.URL.Query().Get("uri"); uriParam != "" { - u, err := url.Parse(uriParam) - if err != nil { - webError(w, "failed to parse uri query parameter", err, http.StatusBadRequest) - return - } - if u.Scheme != "ipfs" && u.Scheme != "ipns" { - webError(w, "uri query parameter scheme must be ipfs or ipns", err, http.StatusBadRequest) - return - } - path := u.Path - if u.RawQuery != "" { // preserve query if present - path = path + "?" + u.RawQuery - } - - redirectURL := gopath.Join("/", u.Scheme, u.Host, path) - logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) - http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) + if requestHandled := handleProtocolHandlerRedirect(w, r, logger); requestHandled { return } - // Service Worker registration request - if r.Header.Get("Service-Worker") == "script" { - // Disallow Service Worker registration on namespace roots - // https://github.com/ipfs/go-ipfs/issues/4025 - matched, _ := regexp.MatchString(`^/ip[fn]s/[^/]+$`, r.URL.Path) - if matched { - err := fmt.Errorf("registration is not allowed for this scope") - webError(w, "navigator.serviceWorker", err, http.StatusBadRequest) - return - } + if err := handleServiceWorkerRegistration(r); err != nil { + webRequestError(w, err) + return } contentPath := ipath.New(r.URL.Path) - if pathErr := contentPath.IsValid(); pathErr != nil { - if fixupSuperfluousNamespace(w, r.URL.Path, r.URL.RawQuery) { - // the error was due to redundant namespace, which we were able to fix - // by returning error/redirect page, nothing left to do here - logger.Debugw("redundant namespace; noop") - return - } - // unable to fix path, returning error - webError(w, "invalid ipfs path", pathErr, http.StatusBadRequest) + if requestHandled := handleSuperfluousNamespace(w, r, contentPath); requestHandled { return } @@ -416,26 +397,13 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } - // Update the global metric of the time it takes to read the final root block of the requested resource - // NOTE: for legacy reasons this happens before we go into content-type specific code paths - _, err = i.api.Block().Get(r.Context(), resolvedPath) - if err != nil { - webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) + if err := i.handleGettingFirstBlock(r, begin, contentPath, resolvedPath); err != nil { + webRequestError(w, err) return } - ns := contentPath.Namespace() - timeToGetFirstContentBlock := time.Since(begin).Seconds() - i.unixfsGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) // deprecated, use firstContentBlockGetMetric instead - i.firstContentBlockGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) - // HTTP Headers - i.addUserHeaders(w) // ok, _now_ write user's headers. - w.Header().Set("X-Ipfs-Path", contentPath.String()) - - if rootCids, err := i.buildIpfsRootsHeader(contentPath.String(), r); err == nil { - w.Header().Set("X-Ipfs-Roots", rootCids) - } else { // this should never happen, as we resolved the contentPath already - webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) + if err := i.setCommonHeaders(w, r, contentPath); err != nil { + webRequestError(w, err) return } @@ -785,6 +753,10 @@ func (i *gatewayHandler) buildIpfsRootsHeader(contentPath string, r *http.Reques return rootCidList, nil } +func webRequestError(w http.ResponseWriter, err *requestError) { + webError(w, err.Message, err.Err, err.StatusCode) +} + func webError(w http.ResponseWriter, message string, err error, defaultCode int) { if _, ok := err.(resolver.ErrNoLink); ok { webErrorWithCode(w, message, err, http.StatusNotFound) @@ -911,32 +883,126 @@ func debugStr(path string) string { return q } +func handleUnsupportedHeaders(r *http.Request) (err *requestError) { + // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) + // TODO: remove this after go-ipfs 0.13 ships + if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { + err := fmt.Errorf("X-Ipfs-Gateway-Prefix support was removed: https://github.com/ipfs/go-ipfs/issues/7702") + return newRequestError("unsupported HTTP header", err, http.StatusBadRequest) + } + return nil +} + +// ?uri query param support for requests produced by web browsers +// via navigator.registerProtocolHandler Web API +// https://developer.mozilla.org/en-US/docs/Web/API/Navigator/registerProtocolHandler +// TLDR: redirect /ipfs/?uri=ipfs%3A%2F%2Fcid%3Fquery%3Dval to /ipfs/cid?query=val +func handleProtocolHandlerRedirect(w http.ResponseWriter, r *http.Request, logger *zap.SugaredLogger) (requestHandled bool) { + if uriParam := r.URL.Query().Get("uri"); uriParam != "" { + u, err := url.Parse(uriParam) + if err != nil { + webError(w, "failed to parse uri query parameter", err, http.StatusBadRequest) + return true + } + if u.Scheme != "ipfs" && u.Scheme != "ipns" { + webError(w, "uri query parameter scheme must be ipfs or ipns", err, http.StatusBadRequest) + return true + } + path := u.Path + if u.RawQuery != "" { // preserve query if present + path = path + "?" + u.RawQuery + } + + redirectURL := gopath.Join("/", u.Scheme, u.Host, path) + logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) + http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) + return true + } + + return false +} + +// Disallow Service Worker registration on namespace roots +// https://github.com/ipfs/go-ipfs/issues/4025 +func handleServiceWorkerRegistration(r *http.Request) (err *requestError) { + if r.Header.Get("Service-Worker") == "script" { + matched, _ := regexp.MatchString(`^/ip[fn]s/[^/]+$`, r.URL.Path) + if matched { + err := fmt.Errorf("registration is not allowed for this scope") + return newRequestError("navigator.serviceWorker", err, http.StatusBadRequest) + } + } + + return nil +} + // Attempt to fix redundant /ipfs/ namespace as long as resulting // 'intended' path is valid. This is in case gremlins were tickled // wrong way and user ended up at /ipfs/ipfs/{cid} or /ipfs/ipns/{id} // like in bafybeien3m7mdn6imm425vc2s22erzyhbvk5n3ofzgikkhmdkh5cuqbpbq :^)) -func fixupSuperfluousNamespace(w http.ResponseWriter, urlPath string, urlQuery string) bool { - if !(strings.HasPrefix(urlPath, "/ipfs/ipfs/") || strings.HasPrefix(urlPath, "/ipfs/ipns/")) { - return false // not a superfluous namespace +func handleSuperfluousNamespace(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) (requestHandled bool) { + // If the path is valid, there's nothing to do + if pathErr := contentPath.IsValid(); pathErr == nil { + return false + } + + // If there's no superflous namespace, there's nothing to do + if !(strings.HasPrefix(r.URL.Path, "/ipfs/ipfs/") || strings.HasPrefix(r.URL.Path, "/ipfs/ipns/")) { + return false } - intendedPath := ipath.New(strings.TrimPrefix(urlPath, "/ipfs")) + + // Attempt to fix the superflous namespace + intendedPath := ipath.New(strings.TrimPrefix(r.URL.Path, "/ipfs")) if err := intendedPath.IsValid(); err != nil { - return false // not a valid path + webError(w, "invalid ipfs path", err, http.StatusBadRequest) + return true } intendedURL := intendedPath.String() - if urlQuery != "" { + if r.URL.RawQuery != "" { // we render HTML, so ensure query entries are properly escaped - q, _ := url.ParseQuery(urlQuery) + q, _ := url.ParseQuery(r.URL.RawQuery) intendedURL = intendedURL + "?" + q.Encode() } // return HTTP 400 (Bad Request) with HTML error page that: // - points at correct canonical path via header // - displays human-readable error // - redirects to intendedURL after a short delay + w.WriteHeader(http.StatusBadRequest) - return redirectTemplate.Execute(w, redirectTemplateData{ + if err := redirectTemplate.Execute(w, redirectTemplateData{ RedirectURL: intendedURL, SuggestedPath: intendedPath.String(), - ErrorMsg: fmt.Sprintf("invalid path: %q should be %q", urlPath, intendedPath.String()), - }) == nil + ErrorMsg: fmt.Sprintf("invalid path: %q should be %q", r.URL.Path, intendedPath.String()), + }); err != nil { + webError(w, "failed to redirect when fixing superfluous namespace", err, http.StatusBadRequest) + } + + return true +} + +func (i *gatewayHandler) handleGettingFirstBlock(r *http.Request, begin time.Time, contentPath ipath.Path, resolvedPath ipath.Resolved) *requestError { + // Update the global metric of the time it takes to read the final root block of the requested resource + // NOTE: for legacy reasons this happens before we go into content-type specific code paths + _, err := i.api.Block().Get(r.Context(), resolvedPath) + if err != nil { + return newRequestError("ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) + } + ns := contentPath.Namespace() + timeToGetFirstContentBlock := time.Since(begin).Seconds() + i.unixfsGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) // deprecated, use firstContentBlockGetMetric instead + i.firstContentBlockGetMetric.WithLabelValues(ns).Observe(timeToGetFirstContentBlock) + return nil +} + +func (i *gatewayHandler) setCommonHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) *requestError { + i.addUserHeaders(w) // ok, _now_ write user's headers. + w.Header().Set("X-Ipfs-Path", contentPath.String()) + + if rootCids, err := i.buildIpfsRootsHeader(contentPath.String(), r); err == nil { + w.Header().Set("X-Ipfs-Roots", rootCids) + } else { // this should never happen, as we resolved the contentPath already + return newRequestError("error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) + } + + return nil } From 88cc0da692792b0b3575f7c7d20fbecea7e2b100 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 19 Apr 2022 20:57:11 +0200 Subject: [PATCH 17/20] feat(gw): improved If-None-Match support (#8891) Improves the way we handle If-None-Match header: - Support for more than one Etag passed in If-None-Match - Match both strong and weak Etags to maximize caching across various HTTP clients and libraries (some send weak Etags by default) - Support for wildcard '*' - Tests for If-None-Match behavior This commit was moved from ipfs/kubo@67fdb6efcdd1d4c0c0ee6fef7190b92c13184dbd --- gateway/core/corehttp/gateway_handler.go | 83 +++++++++++++++++-- .../corehttp/gateway_handler_unixfs_dir.go | 12 +-- gateway/core/corehttp/gateway_test.go | 25 ++++++ 3 files changed, 106 insertions(+), 14 deletions(-) diff --git a/gateway/core/corehttp/gateway_handler.go b/gateway/core/corehttp/gateway_handler.go index 52d86257b..f32fac54e 100644 --- a/gateway/core/corehttp/gateway_handler.go +++ b/gateway/core/corehttp/gateway_handler.go @@ -7,6 +7,7 @@ import ( "io" "mime" "net/http" + "net/textproto" "net/url" "os" gopath "path" @@ -390,11 +391,18 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat)) trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) - // Finish early if client already has matching Etag - ifNoneMatch := r.Header.Get("If-None-Match") - if ifNoneMatch == getEtag(r, resolvedPath.Cid()) || ifNoneMatch == getDirListingEtag(resolvedPath.Cid()) { - w.WriteHeader(http.StatusNotModified) - return + // Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified + if inm := r.Header.Get("If-None-Match"); inm != "" { + pathCid := resolvedPath.Cid() + // need to check against both File and Dir Etag variants + // because this inexpensive check happens before we do any I/O + cidEtag := getEtag(r, pathCid) + dirEtag := getDirListingEtag(pathCid) + if etagMatch(inm, cidEtag, dirEtag) { + // Finish early if client already has a matching Etag + w.WriteHeader(http.StatusNotModified) + return + } } if err := i.handleGettingFirstBlock(r, begin, contentPath, resolvedPath); err != nil { @@ -790,6 +798,71 @@ func getFilename(contentPath ipath.Path) string { return gopath.Base(s) } +// etagMatch evaluates if we can respond with HTTP 304 Not Modified +// It supports multiple weak and strong etags passed in If-None-Matc stringh +// including the wildcard one. +func etagMatch(ifNoneMatchHeader string, cidEtag string, dirEtag string) bool { + buf := ifNoneMatchHeader + for { + buf = textproto.TrimString(buf) + if len(buf) == 0 { + break + } + if buf[0] == ',' { + buf = buf[1:] + continue + } + // If-None-Match: * should match against any etag + if buf[0] == '*' { + return true + } + etag, remain := scanETag(buf) + if etag == "" { + break + } + // Check for match both strong and weak etags + if etagWeakMatch(etag, cidEtag) || etagWeakMatch(etag, dirEtag) { + return true + } + buf = remain + } + return false +} + +// scanETag determines if a syntactically valid ETag is present at s. If so, +// the ETag and remaining text after consuming ETag is returned. Otherwise, +// it returns "", "". +// (This is the same logic as one executed inside of http.ServeContent) +func scanETag(s string) (etag string, remain string) { + s = textproto.TrimString(s) + start := 0 + if strings.HasPrefix(s, "W/") { + start = 2 + } + if len(s[start:]) < 2 || s[start] != '"' { + return "", "" + } + // ETag is either W/"text" or "text". + // See RFC 7232 2.3. + for i := start + 1; i < len(s); i++ { + c := s[i] + switch { + // Character values allowed in ETags. + case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80: + case c == '"': + return s[:i+1], s[i+1:] + default: + return "", "" + } + } + return "", "" +} + +// etagWeakMatch reports whether a and b match using weak ETag comparison. +func etagWeakMatch(a, b string) bool { + return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/") +} + // generate Etag value based on HTTP request and CID func getEtag(r *http.Request, cid cid.Cid) string { prefix := `"` diff --git a/gateway/core/corehttp/gateway_handler_unixfs_dir.go b/gateway/core/corehttp/gateway_handler_unixfs_dir.go index f462e52f8..d2cce5868 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_dir.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_dir.go @@ -93,15 +93,9 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit // type instead of relying on autodetection (which may fail). w.Header().Set("Content-Type", "text/html") - // Generated dir index requires custom Etag (it may change between go-ipfs versions) - if assets.AssetHash != "" { - dirEtag := getDirListingEtag(resolvedPath.Cid()) - w.Header().Set("Etag", dirEtag) - if r.Header.Get("If-None-Match") == dirEtag { - w.WriteHeader(http.StatusNotModified) - return - } - } + // Generated dir index requires custom Etag (output may change between go-ipfs versions) + dirEtag := getDirListingEtag(resolvedPath.Cid()) + w.Header().Set("Etag", dirEtag) if r.Method == http.MethodHead { logger.Debug("return as request's HTTP method is HEAD") diff --git a/gateway/core/corehttp/gateway_test.go b/gateway/core/corehttp/gateway_test.go index 2cba931dd..303e4a1ac 100644 --- a/gateway/core/corehttp/gateway_test.go +++ b/gateway/core/corehttp/gateway_test.go @@ -656,3 +656,28 @@ func TestVersion(t *testing.T) { t.Fatalf("response doesn't contain protocol version:\n%s", s) } } + +func TestEtagMatch(t *testing.T) { + for _, test := range []struct { + header string // value in If-None-Match HTTP header + cidEtag string + dirEtag string + expected bool // expected result of etagMatch(header, cidEtag, dirEtag) + }{ + {"", `"etag"`, "", false}, // no If-None-Match + {"", "", `"etag"`, false}, // no If-None-Match + {`"etag"`, `"etag"`, "", true}, // file etag match + {`W/"etag"`, `"etag"`, "", true}, // file etag match + {`"foo", W/"bar", W/"etag"`, `"etag"`, "", true}, // file etag match (array) + {`"foo",W/"bar",W/"etag"`, `"etag"`, "", true}, // file etag match (compact array) + {`"etag"`, "", `W/"etag"`, true}, // dir etag match + {`"etag"`, "", `W/"etag"`, true}, // dir etag match + {`W/"etag"`, "", `W/"etag"`, true}, // dir etag match + {`*`, `"etag"`, "", true}, // wildcard etag match + } { + result := etagMatch(test.header, test.cidEtag, test.dirEtag) + if result != test.expected { + t.Fatalf("unexpected result of etagMatch(%q, %q, %q), got %t, expected %t", test.header, test.cidEtag, test.dirEtag, result, test.expected) + } + } +} From e433566d424c66809021e045023f186c801cdc3e Mon Sep 17 00:00:00 2001 From: Lucas Molas Date: Thu, 28 Apr 2022 14:36:57 -0300 Subject: [PATCH 18/20] feat(gateway): Gateway.FastDirIndexThreshold (#8853) * fix(core/gateway): option to limit directory size listing * feat(gw): HTMLDirListingLimit This is alternative take on the way we limit the HTML listing output. Instead of a hard cut-off, we list up to HTMLDirListingLimit. When a directory has more items than HTMLDirListingLimit we show additional header and footer informing user that only $HTMLDirListingLimit items are listed. This is a better UX. * fix: 0 disables Gateway.HTMLDirListingLimit * refactor: Gateway.FastDirIndexThreshold see explainer in docs/config.md * refactor: prealoc slices * docs: Gateway.FastDirIndexThreshold * refactor: core/corehttp/gateway_handler.go https://github.com/ipfs/go-ipfs/pull/8853#discussion_r851437088 * docs: apply suggestions from code review Co-authored-by: Alan Shaw Co-authored-by: Marcin Rataj Co-authored-by: Alan Shaw This commit was moved from ipfs/kubo@25cc85fa9359f907f348e0c2139f2b535313c56c --- gateway/core/corehttp/gateway.go | 14 ++-- .../core/corehttp/gateway_handler_unixfs.go | 2 + .../corehttp/gateway_handler_unixfs_dir.go | 66 +++++++++++-------- gateway/core/corehttp/gateway_indexPage.go | 17 ++--- 4 files changed, 58 insertions(+), 41 deletions(-) diff --git a/gateway/core/corehttp/gateway.go b/gateway/core/corehttp/gateway.go index 2e794b53f..a4ae53831 100644 --- a/gateway/core/corehttp/gateway.go +++ b/gateway/core/corehttp/gateway.go @@ -16,9 +16,10 @@ import ( ) type GatewayConfig struct { - Headers map[string][]string - Writable bool - PathPrefixes []string + Headers map[string][]string + Writable bool + PathPrefixes []string + FastDirIndexThreshold int } // A helper function to clean up a set of headers: @@ -89,9 +90,10 @@ func GatewayOption(writable bool, paths ...string) ServeOption { }, headers[ACEHeadersName]...)) var gateway http.Handler = newGatewayHandler(GatewayConfig{ - Headers: headers, - Writable: writable, - PathPrefixes: cfg.Gateway.PathPrefixes, + Headers: headers, + Writable: writable, + PathPrefixes: cfg.Gateway.PathPrefixes, + FastDirIndexThreshold: int(cfg.Gateway.FastDirIndexThreshold.WithDefault(100)), }, api) gateway = otelhttp.NewHandler(gateway, "Gateway.Request") diff --git a/gateway/core/corehttp/gateway_handler_unixfs.go b/gateway/core/corehttp/gateway_handler_unixfs.go index f91e2df3b..b318a641a 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs.go +++ b/gateway/core/corehttp/gateway_handler_unixfs.go @@ -18,6 +18,7 @@ import ( func (i *gatewayHandler) serveUnixFS(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) { ctx, span := tracing.Span(ctx, "Gateway", "ServeUnixFS", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) defer span.End() + // Handling UnixFS dr, err := i.api.Unixfs().Get(ctx, resolvedPath) if err != nil { @@ -39,6 +40,7 @@ func (i *gatewayHandler) serveUnixFS(ctx context.Context, w http.ResponseWriter, internalWebError(w, fmt.Errorf("unsupported UnixFS type")) return } + logger.Debugw("serving unixfs directory", "path", contentPath) i.serveDirectory(ctx, w, r, resolvedPath, contentPath, dir, begin, logger) } diff --git a/gateway/core/corehttp/gateway_handler_unixfs_dir.go b/gateway/core/corehttp/gateway_handler_unixfs_dir.go index d2cce5868..1e059200a 100644 --- a/gateway/core/corehttp/gateway_handler_unixfs_dir.go +++ b/gateway/core/corehttp/gateway_handler_unixfs_dir.go @@ -15,6 +15,7 @@ import ( "github.com/ipfs/go-ipfs/tracing" path "github.com/ipfs/go-path" "github.com/ipfs/go-path/resolver" + options "github.com/ipfs/interface-go-ipfs-core/options" ipath "github.com/ipfs/interface-go-ipfs-core/path" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -102,36 +103,46 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit return } + // Optimization 1: + // List children without fetching their root blocks (fast, but no size info) + results, err := i.api.Unixfs().Ls(ctx, resolvedPath, options.Unixfs.ResolveChildren(false)) + if err != nil { + internalWebError(w, err) + return + } + // storage for directory listing - var dirListing []directoryItem - dirit := dir.Entries() - for dirit.Next() { - size := "?" - if s, err := dirit.Node().Size(); err == nil { - // Size may not be defined/supported. Continue anyways. - size = humanize.Bytes(uint64(s)) - } + dirListing := make([]directoryItem, 0, len(results)) - resolved, err := i.api.ResolvePath(ctx, ipath.Join(resolvedPath, dirit.Name())) - if err != nil { + for link := range results { + if link.Err != nil { internalWebError(w, err) return } - hash := resolved.Cid().String() - - // See comment above where originalUrlPath is declared. + hash := link.Cid.String() di := directoryItem{ - Size: size, - Name: dirit.Name(), - Path: gopath.Join(originalUrlPath, dirit.Name()), + Size: "", // no size because we did not fetch child nodes + Name: link.Name, + Path: gopath.Join(originalUrlPath, link.Name), Hash: hash, ShortHash: shortHash(hash), } dirListing = append(dirListing, di) } - if dirit.Err() != nil { - internalWebError(w, dirit.Err()) - return + + // Optimization 2: fetch sizes only for dirs below FastDirIndexThreshold + if len(dirListing) < i.config.FastDirIndexThreshold { + dirit := dir.Entries() + linkNo := 0 + for dirit.Next() { + size := "?" + if s, err := dirit.Node().Size(); err == nil { + // Size may not be defined/supported. Continue anyways. + size = humanize.Bytes(uint64(s)) + } + dirListing[linkNo].Size = size + linkNo++ + } } // construct the correct back link @@ -180,14 +191,15 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit // See comment above where originalUrlPath is declared. tplData := listingTemplateData{ - GatewayURL: gwURL, - DNSLink: dnslink, - Listing: dirListing, - Size: size, - Path: contentPath.String(), - Breadcrumbs: breadcrumbs(contentPath.String(), dnslink), - BackLink: backLink, - Hash: hash, + GatewayURL: gwURL, + DNSLink: dnslink, + Listing: dirListing, + Size: size, + Path: contentPath.String(), + Breadcrumbs: breadcrumbs(contentPath.String(), dnslink), + BackLink: backLink, + Hash: hash, + FastDirIndexThreshold: i.config.FastDirIndexThreshold, } logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash) diff --git a/gateway/core/corehttp/gateway_indexPage.go b/gateway/core/corehttp/gateway_indexPage.go index fbea91649..6cc548cdc 100644 --- a/gateway/core/corehttp/gateway_indexPage.go +++ b/gateway/core/corehttp/gateway_indexPage.go @@ -12,14 +12,15 @@ import ( // structs for directory listing type listingTemplateData struct { - GatewayURL string - DNSLink bool - Listing []directoryItem - Size string - Path string - Breadcrumbs []breadcrumb - BackLink string - Hash string + GatewayURL string + DNSLink bool + Listing []directoryItem + Size string + Path string + Breadcrumbs []breadcrumb + BackLink string + Hash string + FastDirIndexThreshold int } type directoryItem struct { From ba03bd86efc691ddc7c5ab80ecac96697dde7634 Mon Sep 17 00:00:00 2001 From: Antonio Navarro Perez Date: Wed, 4 May 2022 17:02:07 +0200 Subject: [PATCH 19/20] chore: fix linting errors (#8930) This commit was moved from ipfs/kubo@afd11f1019f0c79a4a6f8b25cc4232239409cc58 --- gateway/core/corehttp/hostname.go | 1 + 1 file changed, 1 insertion(+) diff --git a/gateway/core/corehttp/hostname.go b/gateway/core/corehttp/hostname.go index 6c0ad5bca..93dde67ab 100644 --- a/gateway/core/corehttp/hostname.go +++ b/gateway/core/corehttp/hostname.go @@ -249,6 +249,7 @@ func withHostnameContext(r *http.Request, hostname string) *http.Request { // on subdomain and dnslink gateways. While DNSlink could read value from // Host header, subdomain gateways have more comples rules (knownSubdomainDetails) // More: https://github.com/ipfs/dir-index-html/issues/42 + // nolint: staticcheck // non-backward compatible change ctx := context.WithValue(r.Context(), "gw-hostname", hostname) return r.WithContext(ctx) } From 0f6329e3b091b1bfe931fccb54fe253709a86dd9 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Thu, 19 May 2022 20:11:19 +0200 Subject: [PATCH 20/20] fix: JS caching via Access-Control-Expose-Headers (#8984) This fix safelists additional headers allowing JS running on websites to read them when IPFS resource is downloaded via Fetch API. These headers provide metadata necessary for making smart caching decisions when IPFS resources are downloaded via Service Worker or a similar middleware on the edge. (cherry picked from commit e195b35ff6ce2ce4cb1fa95c13b00843e9c36304) This commit was moved from ipfs/kubo@4449909b2da690d3677903fc0b1797c1ad00275b --- gateway/core/corehttp/gateway.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gateway/core/corehttp/gateway.go b/gateway/core/corehttp/gateway.go index a4ae53831..84ad13897 100644 --- a/gateway/core/corehttp/gateway.go +++ b/gateway/core/corehttp/gateway.go @@ -84,9 +84,12 @@ func GatewayOption(writable bool, paths ...string) ServeOption { headers[ACEHeadersName] = cleanHeaderSet( append([]string{ + "Content-Length", "Content-Range", "X-Chunked-Output", "X-Stream-Output", + "X-Ipfs-Path", + "X-Ipfs-Roots", }, headers[ACEHeadersName]...)) var gateway http.Handler = newGatewayHandler(GatewayConfig{