diff --git a/docs/docs/how-to/local-development/debugging-missing-data.md b/docs/docs/how-to/local-development/debugging-missing-data.md new file mode 100644 index 0000000000000..ac4a03809da45 --- /dev/null +++ b/docs/docs/how-to/local-development/debugging-missing-data.md @@ -0,0 +1,101 @@ +--- +title: Debugging Missing or Stale Data Fields on Nodes +--- + +## Overview of `LMDB` datastore behavior changes + +In Gatsby 3 we introduced a new data store called `LMDB` (enabled with `LMDB_STORE` and/or `PARALLEL_QUERY_RUNNING` flags). In Gatsby 4 it became the default data store. It allows Gatsby to execute data layer related processing outside of the main build process and enables Gatsby to run queries in multiple processes as well as support additional rendering strategies ([DSG](/docs/reference/rendering-options/deferred-static-generation/) and [SSR](/docs/reference/rendering-options/server-side-rendering/)). + +In a lot of cases that change is completely invisible to users, but there are cases where things behave differently. + +Direct node mutations in various API lifecycles are not persisted anymore. In previous Gatsby versions it did work because source of truth for the data layer was directly in Node.js memory, so mutating node was in fact mutating source of truth. Now Gatsby edits data it receives from the database, so unless it explicitly upserts data to this database after edits, those edits will not be persisted (even for same the same build). + +Common errors when doing swap to `LMDB` will be that some fields don't exist anymore or are `null`/`undefined` when trying to execute GraphQL queries. + +## Diagnostic mode + +Gatsby (starting with version 4.4) can detect those node mutations. Unfortunately it adds measurable overhead so it isn't enabled by default. You can opt into it when you see data related issues (particularly when you didn't have this issue before using `LMDB`). To enable diagnostic mode: + +- Use truthy environment variable `GATSBY_DETECT_NODE_MUTATIONS`: + ``` + GATSBY_DETECT_NODE_MUTATIONS=1 gatsby build + ``` +- Or use `DETECT_NODE_MUTATIONS` config flag: + ```javascript:title=gatsby-config.js + module.exports = { + flags: { + DETECT_NODE_MUTATIONS: true, + }, + } + ``` + +Example diagnostic message you might see: + +``` +warn Node mutation detected + +File /plugins/transform/gatsby-node.js:4:20 + 2 | if (node.internal.type === `Test`) { + 3 | // console.log(`[Mutating node in onCreateNode]`) +> 4 | node.nested.a2 = `edited` + | ^ + 5 | } + 6 | } + 7 | + +Stack trace: + at Object.exports.onCreateNode (/plugins/transform/gatsby-node.js:4:20) + at runAPI (/node_modules/gatsby/src/utils/api-runner-node.js:462:22) + at Promise.catch.decorateEvent.pluginName +(/node_modules/gatsby/src/utils/api-runner-node.js:613:13) + at Promise._execute + ... Rest of Stacktrace +``` + +It will point you to the code that is trying to mutate nodes. Note that it might also point to installed plugins in which case you should notify the plugin maintainer about it. + +**Please Note:** Be sure to stop using this mode once you find and handle all problematic code paths as it will decrease performance. + +## Migration + +### Mutating a node in `onCreateNode` + +Instead of mutating nodes directly, `createNodeField` action should be used instead. This way Gatsby will update the source of truth (to actually update the node in the datastore). `createNodeField` will create that additional field under `fields.fieldName`. If you want to preserve schema shape, so that additional field is on the root of a node, you can use schema customization. + +```diff +const { createRemoteFileNode } = require(`gatsby-source-filesystem`) + +exports.onCreateNode = async ({ + node, // the node that was just created +- actions: { createNode }, ++ actions: { createNode, createNodeField }, + createNodeId, + getCache, +}) => { + if (node.internal.type === `SomeNodeType`) { + const fileNode = await createRemoteFileNode({ + // the url of the remote image to generate a node for + url: node.imgUrl, + parentNodeId: node.id, + createNode, + createNodeId, + getCache, + }) + + if (fileNode) { +- node.localFile___NODE = fileNode.id ++ createNodeField({ node, name: 'localFile', value: fileNode.id }) + } + } +} ++ ++exports.createSchemaCustomization = ({ actions }) => { ++ const { createTypes } = actions ++ ++ createTypes(` ++ type SomeNodeType implements Node { ++ localFile: File @link(from: "fields.localFile") ++ } ++ `) ++} +``` diff --git a/docs/docs/reference/release-notes/migrating-from-v3-to-v4.md b/docs/docs/reference/release-notes/migrating-from-v3-to-v4.md index b35abf959c33f..a74e69eb2380e 100644 --- a/docs/docs/reference/release-notes/migrating-from-v3-to-v4.md +++ b/docs/docs/reference/release-notes/migrating-from-v3-to-v4.md @@ -583,8 +583,7 @@ This was never an intended feature of Gatsby and is considered an anti-pattern ( Starting with v4 Gatsby introduces a persisted storage for nodes and thus this pattern will no longer work because nodes are persisted after `createNode` call and all direct mutations after that will be lost. -Unfortunately it is hard to detect it automatically (without sacrificing performance), so we recommend you to -check your code to ensure you don't mutate nodes directly. +Gatsby provides diagnostic mode to detect those direct mutations, unfortunately it has noticeable performance overhead so we don't enable it by default. See [Debugging missing data](/docs/how-to/local-development/debugging-missing-data/) for more details on it. Gatsby provides several actions available in `sourceNodes` and `onCreateNode` APIs to use instead: diff --git a/packages/gatsby/src/redux/actions/public.js b/packages/gatsby/src/redux/actions/public.js index bcf3e23ec787c..ecc76fe34208e 100644 --- a/packages/gatsby/src/redux/actions/public.js +++ b/packages/gatsby/src/redux/actions/public.js @@ -28,6 +28,7 @@ const normalizePath = require(`../../utils/normalize-path`).default import { createJobV2FromInternalJob } from "./internal" import { maybeSendJobToMainProcess } from "../../utils/jobs/worker-messaging" import { reportOnce } from "../../utils/report-once" +import { wrapNode } from "../../utils/detect-node-mutations" const isNotTestEnv = process.env.NODE_ENV !== `test` const isTestEnv = process.env.NODE_ENV === `test` @@ -868,7 +869,7 @@ actions.createNode = const { payload: node, traceId, parentSpan } = createNodeAction return apiRunnerNode(`onCreateNode`, { - node, + node: wrapNode(node), traceId, parentSpan, traceTags: { nodeId: node.id, nodeType: node.internal.type }, diff --git a/packages/gatsby/src/schema/node-model.js b/packages/gatsby/src/schema/node-model.js index 46035933ec46d..2f80b8ae2b943 100644 --- a/packages/gatsby/src/schema/node-model.js +++ b/packages/gatsby/src/schema/node-model.js @@ -22,6 +22,7 @@ import { } from "../datastore" import { GatsbyIterable, isIterable } from "../datastore/common/iterable" import { reportOnce } from "../utils/report-once" +import { wrapNode, wrapNodes } from "../utils/detect-node-mutations" type TypeOrTypeName = string | GraphQLOutputType @@ -149,7 +150,7 @@ class LocalNodeModel { this.trackInlineObjectsInRootNode(node) } - return this.trackPageDependencies(result, pageDependencies) + return wrapNode(this.trackPageDependencies(result, pageDependencies)) } /** @@ -180,7 +181,7 @@ class LocalNodeModel { result.forEach(node => this.trackInlineObjectsInRootNode(node)) } - return this.trackPageDependencies(result, pageDependencies) + return wrapNodes(this.trackPageDependencies(result, pageDependencies)) } /** @@ -221,7 +222,7 @@ class LocalNodeModel { typeof type === `string` ? type : type.name } - return this.trackPageDependencies(result, pageDependencies) + return wrapNodes(this.trackPageDependencies(result, pageDependencies)) } /** @@ -346,7 +347,10 @@ class LocalNodeModel { pageDependencies.connectionType = gqlType.name } this.trackPageDependencies(result.entries, pageDependencies) - return result + return { + entries: wrapNodes(result.entries), + totalCount: result.totalCount, + } } /** @@ -383,7 +387,7 @@ class LocalNodeModel { // the query whenever any node of this type changes. pageDependencies.connectionType = gqlType.name } - return this.trackPageDependencies(first, pageDependencies) + return wrapNode(this.trackPageDependencies(first, pageDependencies)) } prepareNodes(type, queryFields, fieldsToResolve) { diff --git a/packages/gatsby/src/services/initialize.ts b/packages/gatsby/src/services/initialize.ts index 8972da6045269..5c1234759c3bf 100644 --- a/packages/gatsby/src/services/initialize.ts +++ b/packages/gatsby/src/services/initialize.ts @@ -21,6 +21,7 @@ import { IBuildContext } from "./types" import { detectLmdbStore } from "../datastore" import { loadConfigAndPlugins } from "../bootstrap/load-config-and-plugins" import type { InternalJob } from "../utils/jobs/types" +import { enableNodeMutationsDetection } from "../utils/detect-node-mutations" interface IPluginResolution { resolve: string @@ -185,6 +186,10 @@ export async function initialize({ } const lmdbStoreIsUsed = detectLmdbStore() + if (process.env.GATSBY_DETECT_NODE_MUTATIONS) { + enableNodeMutationsDetection() + } + if (config && config.polyfill) { reporter.warn( `Support for custom Promise polyfills has been removed in Gatsby v2. We only support Babel 7's new automatic polyfilling behavior.` diff --git a/packages/gatsby/src/utils/api-runner-node.js b/packages/gatsby/src/utils/api-runner-node.js index e4f3e1779844b..9b412d2a43234 100644 --- a/packages/gatsby/src/utils/api-runner-node.js +++ b/packages/gatsby/src/utils/api-runner-node.js @@ -30,6 +30,7 @@ const { requireGatsbyPlugin } = require(`./require-gatsby-plugin`) const { getNonGatsbyCodeFrameFormatted } = require(`./stack-trace-utils`) const { trackBuildError, decorateEvent } = require(`gatsby-telemetry`) import errorParser from "./api-runner-error-parser" +import { wrapNode, wrapNodes } from "./detect-node-mutations" if (!process.env.BLUEBIRD_DEBUG && !process.env.BLUEBIRD_LONG_STACK_TRACES) { // Unless specified - disable longStackTraces @@ -40,6 +41,21 @@ if (!process.env.BLUEBIRD_DEBUG && !process.env.BLUEBIRD_LONG_STACK_TRACES) { Promise.config({ longStackTraces: false }) } +const nodeMutationsWrappers = { + getNode(id) { + return wrapNode(getNode(id)) + }, + getNodes() { + return wrapNodes(getNodes()) + }, + getNodesByType(type) { + return wrapNodes(getNodesByType(type)) + }, + getNodeAndSavePathDependency(id) { + return wrapNode(getNodeAndSavePathDependency(id)) + }, +} + // Bind action creators per plugin so we can auto-add // metadata to actions they create. const boundPluginActionCreators = {} @@ -372,6 +388,14 @@ const runAPI = async (plugin, api, args, activity) => { runningActivities.forEach(activity => activity.end()) } + const shouldDetectNodeMutations = [ + `sourceNodes`, + `onCreateNode`, + `createResolvers`, + `createSchemaCustomization`, + `setFieldsOnGraphQLNodeType`, + ].includes(api) + const apiCallArgs = [ { ...args, @@ -383,11 +407,19 @@ const runAPI = async (plugin, api, args, activity) => { store, emitter, getCache, - getNodes, - getNode, - getNodesByType, + getNodes: shouldDetectNodeMutations + ? nodeMutationsWrappers.getNodes + : getNodes, + getNode: shouldDetectNodeMutations + ? nodeMutationsWrappers.getNode + : getNode, + getNodesByType: shouldDetectNodeMutations + ? nodeMutationsWrappers.getNodesByType + : getNodesByType, reporter: extendedLocalReporter, - getNodeAndSavePathDependency, + getNodeAndSavePathDependency: shouldDetectNodeMutations + ? nodeMutationsWrappers.getNodeAndSavePathDependency + : getNodeAndSavePathDependency, cache, createNodeId: namespacedCreateNodeId, createContentDigest, diff --git a/packages/gatsby/src/utils/detect-node-mutations.ts b/packages/gatsby/src/utils/detect-node-mutations.ts new file mode 100644 index 0000000000000..619249510de46 --- /dev/null +++ b/packages/gatsby/src/utils/detect-node-mutations.ts @@ -0,0 +1,158 @@ +import reporter from "gatsby-cli/lib/reporter" +import { getNonGatsbyCodeFrameFormatted } from "./stack-trace-utils" +import type { IGatsbyNode } from "../redux/types" + +const reported = new Set() + +const genericProxy = createProxyHandler() +const nodeInternalProxy = createProxyHandler({ + onGet(key, value) { + if (key === `fieldOwners` || key === `content`) { + // all allowed in here + return value + } + return undefined + }, + onSet(target, key, value) { + if (key === `fieldOwners` || key === `content`) { + target[key] = value + return true + } + return undefined + }, +}) + +const nodeProxy = createProxyHandler({ + onGet(key, value) { + if (key === `internal`) { + return memoizedProxy(value, nodeInternalProxy) + } else if ( + key === `__gatsby_resolved` || + key === `fields` || + key === `children` + ) { + // all allowed in here + return value + } + return undefined + }, + onSet(target, key, value) { + if (key === `__gatsby_resolved` || key === `fields` || key === `children`) { + target[key] = value + return true + } + return undefined + }, +}) + +/** + * Every time we create proxy for object, we store it in WeakMap, + * so that we reuse it for that object instead of creating new Proxy. + * This also ensures reference equality: `memoizedProxy(obj) === memoizedProxy(obj)`. + * If we didn't reuse already created proxy above comparison would return false. + */ +const referenceMap = new WeakMap() +function memoizedProxy(target: T, handler: ProxyHandler): T { + const alreadyWrapped = referenceMap.get(target) + if (alreadyWrapped) { + return alreadyWrapped + } else { + const wrapped = new Proxy(target, handler) + referenceMap.set(target, wrapped) + return wrapped + } +} + +function createProxyHandler({ + onGet, + onSet, +}: { + onGet?: (key: string | symbol, value: any) => any + onSet?: (target: any, key: string | symbol, value: any) => boolean | undefined +} = {}): ProxyHandler { + function set(target, key, value): boolean { + if (onSet) { + const result = onSet(target, key, value) + if (result !== undefined) { + return result + } + } + + const error = new Error(`Stack trace:`) + Error.captureStackTrace(error, set) + + if (error.stack && !reported.has(error.stack)) { + reported.add(error.stack) + const codeFrame = getNonGatsbyCodeFrameFormatted({ + stack: error.stack, + }) + reporter.warn( + `Node mutation detected\n\n${ + codeFrame ? `${codeFrame}\n\n` : `` + }${error.stack.replace(/^Error:?\s*/, ``)}` + ) + } + return true + } + + function get(target, key): any { + const value = target[key] + + if (onGet) { + const result = onGet(key, value) + if (result !== undefined) { + return result + } + } + + const fieldDescriptor = Object.getOwnPropertyDescriptor(target, key) + if (fieldDescriptor && !fieldDescriptor.writable) { + // this is to prevent errors like: + // ``` + // TypeError: 'get' on proxy: property 'constants' is a read - only and + // non - configurable data property on the proxy target but the proxy + // did not return its actual value + // (expected '[object Object]' but got '[object Object]') + // ``` + return value + } + + if (typeof value === `object` && value !== null) { + return memoizedProxy(value, genericProxy) + } + + return value + } + + return { + get, + set, + } +} + +let shouldWrapNodesInProxies = !!process.env.GATSBY_DETECT_NODE_MUTATIONS +export function enableNodeMutationsDetection(): void { + shouldWrapNodesInProxies = true + + reporter.warn( + `Node mutation detection is enabled. Remember to disable it after you are finished with diagnostic as it will cause build performance degradation.` + ) +} + +export function wrapNode(node: T): T { + if (node && shouldWrapNodesInProxies) { + return memoizedProxy(node, nodeProxy) + } else { + return node + } +} + +export function wrapNodes | undefined>( + nodes: T +): T { + if (nodes && shouldWrapNodesInProxies && nodes.length > 0) { + return nodes.map(node => memoizedProxy(node, nodeProxy)) as T + } else { + return nodes + } +} diff --git a/packages/gatsby/src/utils/flags.ts b/packages/gatsby/src/utils/flags.ts index 408e2b44716c5..b700349955c15 100644 --- a/packages/gatsby/src/utils/flags.ts +++ b/packages/gatsby/src/utils/flags.ts @@ -226,6 +226,15 @@ const activeFlags: Array = [ }, requires: `Requires Node v14.10 or above.`, }, + { + name: `DETECT_NODE_MUTATIONS`, + env: `GATSBY_DETECT_NODE_MUTATIONS`, + command: `all`, + telemetryId: `DetectNodeMutations`, + description: `Diagnostic mode to log any attempts to mutate node directly. Helpful when debugging missing data problems. See https://gatsby.dev/debugging-missing-data for more details.`, + experimental: false, + testFitness: (): fitnessEnum => true, + }, ] export default activeFlags diff --git a/packages/gatsby/src/utils/stack-trace-utils.ts b/packages/gatsby/src/utils/stack-trace-utils.ts index 87786b8a7844e..d9123fb8b276d 100644 --- a/packages/gatsby/src/utils/stack-trace-utils.ts +++ b/packages/gatsby/src/utils/stack-trace-utils.ts @@ -50,8 +50,18 @@ interface ICodeFrame { export const getNonGatsbyCodeFrame = ({ highlightCode = true, + stack, +}: { + highlightCode?: boolean + stack?: string } = {}): null | ICodeFrame => { - const callSite = getNonGatsbyCallSite() + let callSite + if (stack) { + callSite = stackTrace.parse({ stack, name: ``, message: `` })[0] + } else { + callSite = getNonGatsbyCallSite() + } + if (!callSite) { return null } @@ -80,11 +90,16 @@ export const getNonGatsbyCodeFrame = ({ } } -export const getNonGatsbyCodeFrameFormatted = ({ highlightCode = true } = {}): - | null - | string => { +export const getNonGatsbyCodeFrameFormatted = ({ + highlightCode = true, + stack, +}: { + highlightCode?: boolean + stack?: string +} = {}): null | string => { const possibleCodeFrame = getNonGatsbyCodeFrame({ highlightCode, + stack, }) if (!possibleCodeFrame) {