Skip to content

Commit

Permalink
Don't execute prefetches for bot user agents (#40435)
Browse files Browse the repository at this point in the history
Such bots typically navigate websites using hard navigations (as they
crawl one URL at a time). Respectively, they do not benefit from
prefetches at all, while increasing the cost of both the crawl and
operating the site.

<!--
Thanks for opening a PR! Your contribution is much appreciated.
In order to make sure your PR is handled as smoothly as possible we
request that you follow the checklist sections below.
Choose the right checklist for the change that you're making:
-->

## Bug

- [ ] Related issues linked using `fixes #number`
- [x] Integration tests added
- [ ] Errors have helpful link attached, see `contributing.md`
  • Loading branch information
cramforce committed Sep 13, 2022
1 parent 8bf6a87 commit 421029c
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 7 deletions.
3 changes: 2 additions & 1 deletion packages/next/server/base-server.ts
Expand Up @@ -48,7 +48,8 @@ import Router from './router'

import { setRevalidateHeaders } from './send-payload/revalidate-headers'
import { execOnce } from '../shared/lib/utils'
import { isBlockedPage, isBot } from './utils'
import { isBlockedPage } from './utils'
import { isBot } from '../shared/lib/router/utils/is-bot'
import RenderResult from './render-result'
import { removeTrailingSlash } from '../shared/lib/router/utils/remove-trailing-slash'
import { denormalizePagePath } from '../shared/lib/page-path/denormalize-page-path'
Expand Down
6 changes: 0 additions & 6 deletions packages/next/server/utils.ts
Expand Up @@ -17,12 +17,6 @@ export function cleanAmpPath(pathname: string): string {
return pathname
}

export function isBot(userAgent: string): boolean {
return /Googlebot|Mediapartners-Google|AdsBot-Google|googleweblight|Storebot-Google|Google-PageRenderer|Bingbot|BingPreview|Slurp|DuckDuckBot|baiduspider|yandex|sogou|LinkedInBot|bitlybot|tumblr|vkShare|quora link preview|facebookexternalhit|facebookcatalog|Twitterbot|applebot|redditbot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview|ia_archiver/i.test(
userAgent
)
}

export function isTargetLikeServerless(target: string) {
const isServerless = target === 'serverless'
const isServerlessTrace = target === 'experimental-serverless-trace'
Expand Down
7 changes: 7 additions & 0 deletions packages/next/shared/lib/router/router.ts
Expand Up @@ -47,6 +47,7 @@ import { hasBasePath } from '../../../client/has-base-path'
import { getNextPathnameInfo } from './utils/get-next-pathname-info'
import { formatNextPathnameInfo } from './utils/format-next-pathname-info'
import { compareRouterStates } from './utils/compare-states'
import { isBot } from './utils/is-bot'

declare global {
interface Window {
Expand Down Expand Up @@ -2171,6 +2172,12 @@ export default class Router implements BaseRouter {
asPath: string = url,
options: PrefetchOptions = {}
): Promise<void> {
if (typeof window !== 'undefined' && isBot(window.navigator.userAgent)) {
// No prefetches for bots that render the link since they are typically navigating
// links via the equivalent of a hard navigation and hence never utilize these
// prefetches.
return
}
let parsed = parseRelativeUrl(url)

let { pathname, query } = parsed
Expand Down
5 changes: 5 additions & 0 deletions packages/next/shared/lib/router/utils/is-bot.ts
@@ -0,0 +1,5 @@
export function isBot(userAgent: string): boolean {
return /Googlebot|Mediapartners-Google|AdsBot-Google|googleweblight|Storebot-Google|Google-PageRenderer|Bingbot|BingPreview|Slurp|DuckDuckBot|baiduspider|yandex|sogou|LinkedInBot|bitlybot|tumblr|vkShare|quora link preview|facebookexternalhit|facebookcatalog|Twitterbot|applebot|redditbot|Slackbot|Discordbot|WhatsApp|SkypeUriPreview|ia_archiver/i.test(
userAgent
)
}
22 changes: 22 additions & 0 deletions test/integration/preload-viewport/pages/bot-user-agent.js
@@ -0,0 +1,22 @@
import Head from 'next/head'
import Link from 'next/link'

export default () => {
return (
<div>
<Head>
<script
dangerouslySetInnerHTML={{
__html: `Object.defineProperty(navigator, 'userAgent', {
value: new URLSearchParams(location.search).get("useragent"),
});`,
}}
></script>
</Head>
<br />
<Link href="/another">
<a id="link-another">to /another</a>
</Link>
</div>
)
}
32 changes: 32 additions & 0 deletions test/integration/preload-viewport/test/index.test.js
Expand Up @@ -110,6 +110,38 @@ describe('Prefetching Links in viewport', () => {
}
})

it('should prefetch with non-bot UA', async () => {
let browser
try {
browser = await webdriver(
appPort,
`/bot-user-agent?useragent=${encodeURIComponent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
)}`
)
const links = await browser.elementsByCss('link[rel=prefetch]')
expect(links).toHaveLength(1)
} finally {
if (browser) await browser.close()
}
})

it('should not prefetch with bot UA', async () => {
let browser
try {
browser = await webdriver(
appPort,
`/bot-user-agent?useragent=${encodeURIComponent(
'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/W.X.Y.Z Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
)}`
)
const links = await browser.elementsByCss('link[rel=prefetch]')
expect(links).toHaveLength(0)
} finally {
if (browser) await browser.close()
}
})

it('should prefetch rewritten href with link in viewport onload', async () => {
let browser
try {
Expand Down

0 comments on commit 421029c

Please sign in to comment.