From 421029cf61e7fd99974f625ad18c598f0f3de871 Mon Sep 17 00:00:00 2001 From: Malte Ubl Date: Mon, 12 Sep 2022 17:27:43 -0700 Subject: [PATCH] Don't execute prefetches for bot user agents (#40435) Such bots typically navigate websites using hard navigations (as they crawl one URL at a time). Respectively, they do not benefit from prefetches at all, while increasing the cost of both the crawl and operating the site. ## Bug - [ ] Related issues linked using `fixes #number` - [x] Integration tests added - [ ] Errors have helpful link attached, see `contributing.md` --- packages/next/server/base-server.ts | 3 +- packages/next/server/utils.ts | 6 ---- packages/next/shared/lib/router/router.ts | 7 ++++ .../next/shared/lib/router/utils/is-bot.ts | 5 +++ .../preload-viewport/pages/bot-user-agent.js | 22 +++++++++++++ .../preload-viewport/test/index.test.js | 32 +++++++++++++++++++ 6 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 packages/next/shared/lib/router/utils/is-bot.ts create mode 100644 test/integration/preload-viewport/pages/bot-user-agent.js diff --git a/packages/next/server/base-server.ts b/packages/next/server/base-server.ts index 411983b3ed9ff6c..4bee04739af328a 100644 --- a/packages/next/server/base-server.ts +++ b/packages/next/server/base-server.ts @@ -48,7 +48,8 @@ import Router from './router' import { setRevalidateHeaders } from './send-payload/revalidate-headers' import { execOnce } from '../shared/lib/utils' -import { isBlockedPage, isBot } from './utils' +import { isBlockedPage } from './utils' +import { isBot } from '../shared/lib/router/utils/is-bot' import RenderResult from './render-result' import { removeTrailingSlash } from '../shared/lib/router/utils/remove-trailing-slash' import { denormalizePagePath } from '../shared/lib/page-path/denormalize-page-path' diff --git a/packages/next/server/utils.ts b/packages/next/server/utils.ts index 856f5f7032629f7..9e4ca33e9abb246 100644 --- a/packages/next/server/utils.ts +++ b/packages/next/server/utils.ts @@ -17,12 +17,6 @@ export function cleanAmpPath(pathname: string): string { return pathname } -export function isBot(userAgent: string): boolean { - return /Googlebot|Mediapartners-Google|AdsBot-Google|googleweblight|Storebot-Google|Google-PageRenderer|Bingbot|BingPreview|Slurp|DuckDuckBot|baiduspider|yandex|sogou|LinkedInBot|bitlybot|tumblr|vkShare|quora link preview|facebookexternalhit|facebookcatalog|Twitterbot|applebot|redditbot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview|ia_archiver/i.test( - userAgent - ) -} - export function isTargetLikeServerless(target: string) { const isServerless = target === 'serverless' const isServerlessTrace = target === 'experimental-serverless-trace' diff --git a/packages/next/shared/lib/router/router.ts b/packages/next/shared/lib/router/router.ts index 1ad9a039b42a2ed..cf1180fd6766ee0 100644 --- a/packages/next/shared/lib/router/router.ts +++ b/packages/next/shared/lib/router/router.ts @@ -47,6 +47,7 @@ import { hasBasePath } from '../../../client/has-base-path' import { getNextPathnameInfo } from './utils/get-next-pathname-info' import { formatNextPathnameInfo } from './utils/format-next-pathname-info' import { compareRouterStates } from './utils/compare-states' +import { isBot } from './utils/is-bot' declare global { interface Window { @@ -2171,6 +2172,12 @@ export default class Router implements BaseRouter { asPath: string = url, options: PrefetchOptions = {} ): Promise { + if (typeof window !== 'undefined' && isBot(window.navigator.userAgent)) { + // No prefetches for bots that render the link since they are typically navigating + // links via the equivalent of a hard navigation and hence never utilize these + // prefetches. + return + } let parsed = parseRelativeUrl(url) let { pathname, query } = parsed diff --git a/packages/next/shared/lib/router/utils/is-bot.ts b/packages/next/shared/lib/router/utils/is-bot.ts new file mode 100644 index 000000000000000..c512679b4381b3e --- /dev/null +++ b/packages/next/shared/lib/router/utils/is-bot.ts @@ -0,0 +1,5 @@ +export function isBot(userAgent: string): boolean { + return /Googlebot|Mediapartners-Google|AdsBot-Google|googleweblight|Storebot-Google|Google-PageRenderer|Bingbot|BingPreview|Slurp|DuckDuckBot|baiduspider|yandex|sogou|LinkedInBot|bitlybot|tumblr|vkShare|quora link preview|facebookexternalhit|facebookcatalog|Twitterbot|applebot|redditbot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview|ia_archiver/i.test( + userAgent + ) +} diff --git a/test/integration/preload-viewport/pages/bot-user-agent.js b/test/integration/preload-viewport/pages/bot-user-agent.js new file mode 100644 index 000000000000000..aa524d95585096c --- /dev/null +++ b/test/integration/preload-viewport/pages/bot-user-agent.js @@ -0,0 +1,22 @@ +import Head from 'next/head' +import Link from 'next/link' + +export default () => { + return ( +
+ + + +
+ + to /another + +
+ ) +} diff --git a/test/integration/preload-viewport/test/index.test.js b/test/integration/preload-viewport/test/index.test.js index b8aad8d5a45fb77..9f0e023ea54c6f8 100644 --- a/test/integration/preload-viewport/test/index.test.js +++ b/test/integration/preload-viewport/test/index.test.js @@ -110,6 +110,38 @@ describe('Prefetching Links in viewport', () => { } }) + it('should prefetch with non-bot UA', async () => { + let browser + try { + browser = await webdriver( + appPort, + `/bot-user-agent?useragent=${encodeURIComponent( + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36' + )}` + ) + const links = await browser.elementsByCss('link[rel=prefetch]') + expect(links).toHaveLength(1) + } finally { + if (browser) await browser.close() + } + }) + + it('should not prefetch with bot UA', async () => { + let browser + try { + browser = await webdriver( + appPort, + `/bot-user-agent?useragent=${encodeURIComponent( + 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' + )}` + ) + const links = await browser.elementsByCss('link[rel=prefetch]') + expect(links).toHaveLength(0) + } finally { + if (browser) await browser.close() + } + }) + it('should prefetch rewritten href with link in viewport onload', async () => { let browser try {