diff --git a/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts b/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts index c080c8b028d4..f1f152ecb137 100644 --- a/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts +++ b/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts @@ -5,7 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -import React from 'react'; +import {createElement} from 'react'; import {fromPartial} from '@total-typescript/shoehorn'; import createSitemap from '../createSitemap'; import type {PluginOptions} from '../options'; @@ -84,6 +84,53 @@ describe('createSitemap', () => { expect(sitemap).not.toContain('/tags'); }); + it('excludes items that createSitemapItems configures to be ignored', async () => { + const sitemap = await createSitemap({ + siteConfig, + routes: routes([ + '/', + '/search/', + '/tags/', + '/search/foo', + '/tags/foo/bar', + ]), + head: {}, + options: { + ...options, + createSitemapItems: async (params) => { + const {defaultCreateSitemapItems, ...rest} = params; + const sitemapItems = await defaultCreateSitemapItems(rest); + const sitemapsWithoutPageAndTags = sitemapItems.filter( + (sitemapItem) => + !sitemapItem.url.includes('/tags/') && + !sitemapItem.url.endsWith('/search/'), + ); + return sitemapsWithoutPageAndTags; + }, + }, + }); + + expect(sitemap).not.toContain('/search/'); + expect(sitemap).toContain('/search/foo'); + expect(sitemap).not.toContain('/tags'); + }); + + it('returns null when createSitemapItems returns no items', async () => { + const sitemap = await createSitemap({ + siteConfig, + routes: routes(['/', '/docs/myDoc/', '/blog/post']), + head: {}, + options: { + ...options, + createSitemapItems: async () => { + return []; + }, + }, + }); + + expect(sitemap).toBeNull(); + }); + it('keep trailing slash unchanged', async () => { const sitemap = await createSitemap({ siteConfig, @@ -140,7 +187,7 @@ describe('createSitemap', () => { meta: { // @ts-expect-error: bad lib def toComponent: () => [ - React.createElement('meta', { + createElement('meta', { name: 'robots', content: 'NoFolloW, NoiNDeX', }), @@ -164,7 +211,7 @@ describe('createSitemap', () => { meta: { // @ts-expect-error: bad lib def toComponent: () => [ - React.createElement('meta', {name: 'robots', content: 'noindex'}), + createElement('meta', {name: 'robots', content: 'noindex'}), ], }, }, @@ -172,7 +219,7 @@ describe('createSitemap', () => { meta: { // @ts-expect-error: bad lib def toComponent: () => [ - React.createElement('meta', {name: 'robots', content: 'noindex'}), + createElement('meta', {name: 'robots', content: 'noindex'}), ], }, }, diff --git a/packages/docusaurus-plugin-sitemap/src/__tests__/options.test.ts b/packages/docusaurus-plugin-sitemap/src/__tests__/options.test.ts index 7b3cac21b367..dcea70b6a7e6 100644 --- a/packages/docusaurus-plugin-sitemap/src/__tests__/options.test.ts +++ b/packages/docusaurus-plugin-sitemap/src/__tests__/options.test.ts @@ -249,4 +249,44 @@ describe('validateOptions', () => { ); }); }); + + describe('createSitemapItems', () => { + it('accept createSitemapItems undefined', () => { + const userOptions: Options = { + createSitemapItems: undefined, + }; + expect(testValidate(userOptions)).toEqual(defaultOptions); + }); + + it('accept createSitemapItems valid', () => { + const userOptions: Options = { + createSitemapItems: async (params) => { + const {defaultCreateSitemapItems, ...rest} = params; + const sitemapItems = await defaultCreateSitemapItems(rest); + const sitemapsWithoutPageAndTags = sitemapItems.filter( + (sitemapItem) => + !sitemapItem.url.includes('/tags/') && + !sitemapItem.url.includes('/page/'), + ); + return sitemapsWithoutPageAndTags; + }, + }; + expect(testValidate(userOptions)).toEqual({ + ...defaultOptions, + ...userOptions, + }); + }); + + it('rejects createSitemapItems bad input type', () => { + const userOptions: Options = { + // @ts-expect-error: test + createSitemapItems: 'not a function', + }; + expect(() => + testValidate(userOptions), + ).toThrowErrorMatchingInlineSnapshot( + `""createSitemapItems" must be of type function"`, + ); + }); + }); }); diff --git a/packages/docusaurus-plugin-sitemap/src/createSitemap.ts b/packages/docusaurus-plugin-sitemap/src/createSitemap.ts index 1f7790db7924..f3f3aace18bc 100644 --- a/packages/docusaurus-plugin-sitemap/src/createSitemap.ts +++ b/packages/docusaurus-plugin-sitemap/src/createSitemap.ts @@ -5,57 +5,14 @@ * LICENSE file in the root directory of this source tree. */ -import type {ReactElement} from 'react'; import {createMatcher, flattenRoutes} from '@docusaurus/utils'; import {sitemapItemsToXmlString} from './xml'; import {createSitemapItem} from './createSitemapItem'; -import type {SitemapItem} from './types'; -import type {DocusaurusConfig, RouteConfig} from '@docusaurus/types'; -import type {HelmetServerState} from 'react-helmet-async'; +import {isNoIndexMetaRoute} from './head'; +import type {CreateSitemapItemsFn, CreateSitemapItemsParams} from './types'; +import type {RouteConfig} from '@docusaurus/types'; import type {PluginOptions} from './options'; - -type CreateSitemapParams = { - siteConfig: DocusaurusConfig; - routes: RouteConfig[]; - head: {[location: string]: HelmetServerState}; - options: PluginOptions; -}; - -// Maybe we want to add a routeConfig.metadata.noIndex instead? -// But using Helmet is more reliable for third-party plugins... -function isNoIndexMetaRoute({ - head, - route, -}: { - head: {[location: string]: HelmetServerState}; - route: string; -}) { - const isNoIndexMetaTag = ({ - name, - content, - }: { - name?: string; - content?: string; - }): boolean => { - if (!name || !content) { - return false; - } - return ( - // meta name is not case-sensitive - name.toLowerCase() === 'robots' && - // Robots directives are not case-sensitive - content.toLowerCase().includes('noindex') - ); - }; - - // https://github.com/staylor/react-helmet-async/pull/167 - const meta = head[route]?.meta.toComponent() as unknown as - | ReactElement<{name?: string; content?: string}>[] - | undefined; - return meta?.some((tag) => - isNoIndexMetaTag({name: tag.props.name, content: tag.props.content}), - ); -} +import type {HelmetServerState} from 'react-helmet-async'; // Not all routes should appear in the sitemap, and we should filter: // - parent routes, used for layouts @@ -75,32 +32,57 @@ function getSitemapRoutes({routes, head, options}: CreateSitemapParams) { return flattenRoutes(routes).filter((route) => !isRouteExcluded(route)); } -async function createSitemapItems( - params: CreateSitemapParams, -): Promise { - const sitemapRoutes = getSitemapRoutes(params); - if (sitemapRoutes.length === 0) { - return []; - } - return Promise.all( - sitemapRoutes.map((route) => - createSitemapItem({ - route, - siteConfig: params.siteConfig, - options: params.options, - }), - ), - ); +// Our default implementation receives some additional parameters on purpose +// Params such as "head" are "messy" and not directly exposed to the user +function createDefaultCreateSitemapItems( + internalParams: Pick, +): CreateSitemapItemsFn { + return async (params) => { + const sitemapRoutes = getSitemapRoutes({...params, ...internalParams}); + if (sitemapRoutes.length === 0) { + return []; + } + return Promise.all( + sitemapRoutes.map((route) => + createSitemapItem({ + route, + siteConfig: params.siteConfig, + options: internalParams.options, + }), + ), + ); + }; } +type CreateSitemapParams = CreateSitemapItemsParams & { + head: {[location: string]: HelmetServerState}; + options: PluginOptions; +}; + export default async function createSitemap( params: CreateSitemapParams, ): Promise { - const items = await createSitemapItems(params); - if (items.length === 0) { + const {head, options, routes, siteConfig} = params; + + const defaultCreateSitemapItems: CreateSitemapItemsFn = + createDefaultCreateSitemapItems({head, options}); + + const sitemapItems = params.options.createSitemapItems + ? await params.options.createSitemapItems({ + routes, + siteConfig, + defaultCreateSitemapItems, + }) + : await defaultCreateSitemapItems({ + routes, + siteConfig, + }); + + if (sitemapItems.length === 0) { return null; } - const xmlString = await sitemapItemsToXmlString(items, { + + const xmlString = await sitemapItemsToXmlString(sitemapItems, { lastmod: params.options.lastmod, }); return xmlString; diff --git a/packages/docusaurus-plugin-sitemap/src/head.ts b/packages/docusaurus-plugin-sitemap/src/head.ts new file mode 100644 index 000000000000..ed16fdf85234 --- /dev/null +++ b/packages/docusaurus-plugin-sitemap/src/head.ts @@ -0,0 +1,47 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import type {ReactElement} from 'react'; +import type {HelmetServerState} from 'react-helmet-async'; + +// Maybe we want to add a routeConfig.metadata.noIndex instead? +// But using Helmet is more reliable for third-party plugins... +export function isNoIndexMetaRoute({ + head, + route, +}: { + head: {[location: string]: HelmetServerState}; + route: string; +}): boolean { + const isNoIndexMetaTag = ({ + name, + content, + }: { + name?: string; + content?: string; + }): boolean => { + if (!name || !content) { + return false; + } + return ( + // meta name is not case-sensitive + name.toLowerCase() === 'robots' && + // Robots directives are not case-sensitive + content.toLowerCase().includes('noindex') + ); + }; + + // https://github.com/staylor/react-helmet-async/pull/167 + const meta = head[route]?.meta.toComponent() as unknown as + | ReactElement<{name?: string; content?: string}>[] + | undefined; + return ( + meta?.some((tag) => + isNoIndexMetaTag({name: tag.props.name, content: tag.props.content}), + ) ?? false + ); +} diff --git a/packages/docusaurus-plugin-sitemap/src/options.ts b/packages/docusaurus-plugin-sitemap/src/options.ts index e6d4c94e9a40..a05a1c74b8f3 100644 --- a/packages/docusaurus-plugin-sitemap/src/options.ts +++ b/packages/docusaurus-plugin-sitemap/src/options.ts @@ -8,7 +8,13 @@ import {Joi} from '@docusaurus/utils-validation'; import {ChangeFreqList, LastModOptionList} from './types'; import type {OptionValidationContext} from '@docusaurus/types'; -import type {ChangeFreq, LastModOption} from './types'; +import type { + ChangeFreq, + LastModOption, + SitemapItem, + CreateSitemapItemsFn, + CreateSitemapItemsParams, +} from './types'; export type PluginOptions = { /** @@ -44,8 +50,17 @@ export type PluginOptions = { * @see https://www.sitemaps.org/protocol.html#xmlTagDefinitions */ priority: number | null; + + /** Allow control over the construction of SitemapItems */ + createSitemapItems?: CreateSitemapItemsOption; }; +type CreateSitemapItemsOption = ( + params: CreateSitemapItemsParams & { + defaultCreateSitemapItems: CreateSitemapItemsFn; + }, +) => Promise; + export type Options = Partial; export const DEFAULT_OPTIONS: PluginOptions = { @@ -90,6 +105,8 @@ const PluginOptionSchema = Joi.object({ .valid(null, ...LastModOptionList) .default(DEFAULT_OPTIONS.lastmod), + createSitemapItems: Joi.function(), + ignorePatterns: Joi.array() .items(Joi.string()) .default(DEFAULT_OPTIONS.ignorePatterns), diff --git a/packages/docusaurus-plugin-sitemap/src/types.ts b/packages/docusaurus-plugin-sitemap/src/types.ts index f959ca09018b..ca4536b173b7 100644 --- a/packages/docusaurus-plugin-sitemap/src/types.ts +++ b/packages/docusaurus-plugin-sitemap/src/types.ts @@ -5,6 +5,8 @@ * LICENSE file in the root directory of this source tree. */ +import type {DocusaurusConfig, RouteConfig} from '@docusaurus/types'; + export const LastModOptionList = ['date', 'datetime'] as const; export type LastModOption = (typeof LastModOptionList)[number]; @@ -65,3 +67,12 @@ export type SitemapItem = { */ priority?: number | null; }; + +export type CreateSitemapItemsParams = { + siteConfig: DocusaurusConfig; + routes: RouteConfig[]; +}; + +export type CreateSitemapItemsFn = ( + params: CreateSitemapItemsParams, +) => Promise; diff --git a/website/docs/api/plugins/plugin-sitemap.mdx b/website/docs/api/plugins/plugin-sitemap.mdx index 41f240839454..75ca74ef8b70 100644 --- a/website/docs/api/plugins/plugin-sitemap.mdx +++ b/website/docs/api/plugins/plugin-sitemap.mdx @@ -44,11 +44,24 @@ Accepted fields: | `priority` | `number \| null` | `0.5` | See [sitemap docs](https://www.sitemaps.org/protocol.html#xmlTagDefinitions) | | `ignorePatterns` | `string[]` | `[]` | A list of glob patterns; matching route paths will be filtered from the sitemap. Note that you may need to include the base URL in here. | | `filename` | `string` | `sitemap.xml` | The path to the created sitemap file, relative to the output directory. Useful if you have two plugin instances outputting two files. | +| `createSitemapItems` | [CreateSitemapItemsFn](#CreateSitemapItemsFn) \| undefined | `undefined` | An optional function which can be used to transform and / or filter the items in the sitemap. | ```mdx-code-block ``` +### Types {#types} + +#### `CreateSitemapItemsFn` {#CreateSitemapItemsFn} + +```ts +type CreateSitemapItemsFn = (params: { + siteConfig: DocusaurusConfig; + routes: RouteConfig[]; + defaultCreateSitemapItems: CreateSitemapItemsFn; +}) => Promise; +``` + :::info This plugin also respects some site config: @@ -86,6 +99,11 @@ const config = { priority: 0.5, ignorePatterns: ['/tags/**'], filename: 'sitemap.xml', + createSitemapItems: async (params) => { + const {defaultCreateSitemapItems, ...rest} = params; + const items = await defaultCreateSitemapItems(rest); + return items.filter((item) => !item.url.includes('/page/')); + }, }; ```