From 0d76e1726423698c6cf3c7527720dae7c80e770f Mon Sep 17 00:00:00 2001 From: Malte Ubl Date: Sat, 10 Sep 2022 17:22:33 -0700 Subject: [PATCH] Don't execute prefetches for bot user agents Such bots typically navigate websites using hard navigations (as they crawl one URL at a time). Respectively, they do not benefit from prefetches at all, while increasing the cost of both the crawl and operating the site. --- packages/next/server/base-server.ts | 3 ++- packages/next/server/utils.ts | 6 ------ packages/next/shared/lib/router/router.ts | 7 +++++++ packages/next/shared/lib/router/utils/is-bot.ts | 5 +++++ .../preload-viewport/test/index.test.js | 14 ++++++++++++++ test/lib/browsers/base.ts | 6 +++++- test/lib/browsers/playwright.ts | 4 ++-- test/lib/browsers/selenium.ts | 7 ++++++- test/lib/next-webdriver.ts | 3 ++- 9 files changed, 43 insertions(+), 12 deletions(-) create mode 100644 packages/next/shared/lib/router/utils/is-bot.ts diff --git a/packages/next/server/base-server.ts b/packages/next/server/base-server.ts index 411983b3ed9ff6c..4bee04739af328a 100644 --- a/packages/next/server/base-server.ts +++ b/packages/next/server/base-server.ts @@ -48,7 +48,8 @@ import Router from './router' import { setRevalidateHeaders } from './send-payload/revalidate-headers' import { execOnce } from '../shared/lib/utils' -import { isBlockedPage, isBot } from './utils' +import { isBlockedPage } from './utils' +import { isBot } from '../shared/lib/router/utils/is-bot' import RenderResult from './render-result' import { removeTrailingSlash } from '../shared/lib/router/utils/remove-trailing-slash' import { denormalizePagePath } from '../shared/lib/page-path/denormalize-page-path' diff --git a/packages/next/server/utils.ts b/packages/next/server/utils.ts index 856f5f7032629f7..9e4ca33e9abb246 100644 --- a/packages/next/server/utils.ts +++ b/packages/next/server/utils.ts @@ -17,12 +17,6 @@ export function cleanAmpPath(pathname: string): string { return pathname } -export function isBot(userAgent: string): boolean { - return /Googlebot|Mediapartners-Google|AdsBot-Google|googleweblight|Storebot-Google|Google-PageRenderer|Bingbot|BingPreview|Slurp|DuckDuckBot|baiduspider|yandex|sogou|LinkedInBot|bitlybot|tumblr|vkShare|quora link preview|facebookexternalhit|facebookcatalog|Twitterbot|applebot|redditbot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview|ia_archiver/i.test( - userAgent - ) -} - export function isTargetLikeServerless(target: string) { const isServerless = target === 'serverless' const isServerlessTrace = target === 'experimental-serverless-trace' diff --git a/packages/next/shared/lib/router/router.ts b/packages/next/shared/lib/router/router.ts index 5fab0820e1b67f2..db0388c965c17e2 100644 --- a/packages/next/shared/lib/router/router.ts +++ b/packages/next/shared/lib/router/router.ts @@ -47,6 +47,7 @@ import { hasBasePath } from '../../../client/has-base-path' import { getNextPathnameInfo } from './utils/get-next-pathname-info' import { formatNextPathnameInfo } from './utils/format-next-pathname-info' import { compareRouterStates } from './utils/compare-states' +import { isBot } from './utils/is-bot' declare global { interface Window { @@ -2210,6 +2211,12 @@ export default class Router implements BaseRouter { asPath: string = url, options: PrefetchOptions = {} ): Promise { + if (typeof navigator !== 'undefined' && isBot(navigator.userAgent)) { + // No prefetches for bots that render the link since they are typically navigating + // links via the equivalent of a hard navigation and hence never utilize these + // prefetches. + return + } let parsed = parseRelativeUrl(url) let { pathname, query } = parsed diff --git a/packages/next/shared/lib/router/utils/is-bot.ts b/packages/next/shared/lib/router/utils/is-bot.ts new file mode 100644 index 000000000000000..c512679b4381b3e --- /dev/null +++ b/packages/next/shared/lib/router/utils/is-bot.ts @@ -0,0 +1,5 @@ +export function isBot(userAgent: string): boolean { + return /Googlebot|Mediapartners-Google|AdsBot-Google|googleweblight|Storebot-Google|Google-PageRenderer|Bingbot|BingPreview|Slurp|DuckDuckBot|baiduspider|yandex|sogou|LinkedInBot|bitlybot|tumblr|vkShare|quora link preview|facebookexternalhit|facebookcatalog|Twitterbot|applebot|redditbot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview|ia_archiver/i.test( + userAgent + ) +} diff --git a/test/integration/preload-viewport/test/index.test.js b/test/integration/preload-viewport/test/index.test.js index b8aad8d5a45fb77..bc1d1c495a505b5 100644 --- a/test/integration/preload-viewport/test/index.test.js +++ b/test/integration/preload-viewport/test/index.test.js @@ -110,6 +110,20 @@ describe('Prefetching Links in viewport', () => { } }) + it('should not prefetch with bot UA', async () => { + let browser + try { + browser = await webdriver(appPort, '/', { + userAgent: + ' Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + }) + const links = await browser.elementsByCss('link[rel=prefetch]') + expect(links).toHaveLength(0) + } finally { + if (browser) await browser.close() + } + }) + it('should prefetch rewritten href with link in viewport onload', async () => { let browser try { diff --git a/test/lib/browsers/base.ts b/test/lib/browsers/base.ts index 748270aca302435..561d473ed327cce 100644 --- a/test/lib/browsers/base.ts +++ b/test/lib/browsers/base.ts @@ -18,7 +18,11 @@ export class BrowserInterface { return this } - async setup(browserName: string, locale?: string): Promise {} + async setup( + browserName: string, + locale?: string, + userAgent?: string + ): Promise {} async close(): Promise {} async quit(): Promise {} diff --git a/test/lib/browsers/playwright.ts b/test/lib/browsers/playwright.ts index cfa313108c03980..6d2dd3181090f03 100644 --- a/test/lib/browsers/playwright.ts +++ b/test/lib/browsers/playwright.ts @@ -47,7 +47,7 @@ export class Playwright extends BrowserInterface { this.eventCallbacks[event]?.delete(cb) } - async setup(browserName: string, locale?: string) { + async setup(browserName: string, locale?: string, userAgent?: string) { if (browser) return const headless = !!process.env.HEADLESS let device @@ -69,7 +69,7 @@ export class Playwright extends BrowserInterface { } else { browser = await chromium.launch({ headless, devtools: !headless }) } - context = await browser.newContext({ locale, ...device }) + context = await browser.newContext({ locale, ...device, userAgent }) } async get(url: string): Promise { diff --git a/test/lib/browsers/selenium.ts b/test/lib/browsers/selenium.ts index e6e8c1aa001e2f9..f785d0147cf3407 100644 --- a/test/lib/browsers/selenium.ts +++ b/test/lib/browsers/selenium.ts @@ -46,7 +46,7 @@ export class Selenium extends BrowserInterface { private browserName: string // TODO: support setting locale - async setup(browserName: string, locale?: string) { + async setup(browserName: string, locale?: string, userAgent?: string) { if (browser) return this.browserName = browserName @@ -155,6 +155,11 @@ export class Selenium extends BrowserInterface { let firefoxOptions = new FireFoxOptions() let safariOptions = new SafariOptions() + if (userAgent) { + chromeOptions.addArguments(`user-agent="${userAgent}"`) + firefoxOptions.setPreference('general.useragent.override', userAgent) + } + if (HEADLESS) { const screenSize = { width: 1280, height: 720 } chromeOptions = chromeOptions.headless().windowSize(screenSize) as any diff --git a/test/lib/next-webdriver.ts b/test/lib/next-webdriver.ts index 72fdcb2445aa522..b3bf0e2ab0381f4 100644 --- a/test/lib/next-webdriver.ts +++ b/test/lib/next-webdriver.ts @@ -61,6 +61,7 @@ export default async function webdriver( disableCache?: boolean beforePageLoad?: (page: any) => void locale?: string + userAgent?: string } ): Promise { let CurrentInterface: typeof BrowserInterface @@ -92,7 +93,7 @@ export default async function webdriver( const browser = new CurrentInterface() const browserName = process.env.BROWSER_NAME || 'chrome' - await browser.setup(browserName, locale) + await browser.setup(browserName, locale, userAgent) ;(global as any).browserName = browserName const fullUrl = getFullUrl(