sindresorhus · sindresorhus · Nov 10, 2023 · Jul 14, 2023 · Jul 14, 2023 · Jul 14, 2023
diff --git a/core.d.ts b/core.d.ts
@@ -318,6 +318,38 @@ export type ReadableStreamWithFileType = ReadableStream & {
 	readonly fileType?: FileTypeResult;
 };
 
+/**
+Function that allows specifying custom detection mechanisms.
+
+An iterable of detectors can be provided as argument for filetype detection methods.
+
+The detectors are called before the default detections in the provided order.
+
+Custom detectors can be used to add new FileTypeResults or to modify return behaviour of existing FileTypeResult detections.
+
+If the detector returns `undefined`, the `tokenizer.position` should be 0 (unless it's a stream). That allows other detectors to parse the file.
+
+Example detector array which can be extended and provided as argument to each public method:
+
+	const customDetectors = [
+		async tokenizer => {
+			const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" as decimal string
+			const buffer = Buffer.alloc(7);
+			await tokenizer.peekBuffer(buffer, {length: unicornHeader.length, mayBeLess: true});
+			if (unicornHeader.every((value, index) => value === buffer[index])) {
+				return {ext: 'unicorn', mime: 'application/unicorn'};
+			}
+
+			return undefined;
+		},
+	];
+
+ @param tokenizer - An [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer) usable as source of the examined file.
+ @param fileType - FileTypeResult detected by the standard detections or a previous custom detection. Undefined if no matching fileTypeResult could be found.
+ @returns supposedly detected file extension and MIME type as a FileTypeResult-like object, or `undefined` when there is no match.
+ */
+export type Detector = (tokenizer: ITokenizer, fileType?: FileTypeResult) => Promise<FileTypeResult | undefined>;
+
 /**
 Detect the file type of a `Buffer`, `Uint8Array`, or `ArrayBuffer`.
 
@@ -326,19 +358,21 @@ The file type is detected by checking the [magic number](https://en.wikipedia.or
 If file access is available, it is recommended to use `.fromFile()` instead.
 
 @param buffer - An Uint8Array or Buffer representing file data. It works best if the buffer contains the entire file, it may work with a smaller portion as well.
+@param customDetectors - Optional: An Iterable of Detector functions. They are called in the order provided.
 @returns The detected file type and MIME type, or `undefined` when there is no match.
 */
-export function fileTypeFromBuffer(buffer: Uint8Array | ArrayBuffer): Promise<FileTypeResult | undefined>;
+export function fileTypeFromBuffer(buffer: Uint8Array | ArrayBuffer, customDetectors?: Iterable<Detector>): Promise<FileTypeResult | undefined>;
 
 /**
 Detect the file type of a Node.js [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable).
 
 The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
 
 @param stream - A readable stream representing file data.
+@param customDetectors - Optional: An Iterable of Detector functions. They are called in the order provided.
 @returns The detected file type and MIME type, or `undefined` when there is no match.
 */
-export function fileTypeFromStream(stream: ReadableStream): Promise<FileTypeResult | undefined>;
+export function fileTypeFromStream(stream: ReadableStream, customDetectors?: Iterable<Detector>): Promise<FileTypeResult | undefined>;
 
 /**
 Detect the file type from an [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer) source.
@@ -348,6 +382,7 @@ This method is used internally, but can also be used for a special "tokenizer" r
 A tokenizer propagates the internal read functions, allowing alternative transport mechanisms, to access files, to be implemented and used.
 
 @param tokenizer - File source implementing the tokenizer interface.
+@param customDetectors - Optional: An Iterable of Detector functions. They are called in the order provided.
 @returns The detected file type and MIME type, or `undefined` when there is no match.
 
 An example is [`@tokenizer/http`](https://github.com/Borewit/tokenizer-http), which requests data using [HTTP-range-requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests). A difference with a conventional stream and the [*tokenizer*](https://github.com/Borewit/strtok3#tokenizer), is that it is able to *ignore* (seek, fast-forward) in the stream. For example, you may only need and read the first 6 bytes, and the last 128 bytes, which may be an advantage in case reading the entire file would take longer.
@@ -366,7 +401,7 @@ console.log(fileType);
 //=> {ext: 'mp3', mime: 'audio/mpeg'}
 ```
 */
-export function fileTypeFromTokenizer(tokenizer: ITokenizer): Promise<FileTypeResult | undefined>;
+export function fileTypeFromTokenizer(tokenizer: ITokenizer, customDetectors?: Iterable<Detector>): Promise<FileTypeResult | undefined>;
 
 /**
 Supported file extensions.
@@ -399,6 +434,7 @@ A smaller sample size will result in lower probability of the best file type det
 **Note:** Requires Node.js 14 or later.
 
 @param readableStream - A [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable) containing a file to examine.
+@param customDetectors - Optional: An Iterable of Detector functions. They are called in the order provided.
 @returns A `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
 
 @example
@@ -416,11 +452,15 @@ if (stream2.fileType?.mime === 'image/jpeg') {
 }
 ```
 */
-export function fileTypeStream(readableStream: ReadableStream, options?: StreamOptions): Promise<ReadableStreamWithFileType>;
+export function fileTypeStream(readableStream: ReadableStream, options?: StreamOptions, customDetectors?: Iterable<Detector>): Promise<ReadableStreamWithFileType>;
 
 /**
 Detect the file type of a [`Blob`](https://nodejs.org/api/buffer.html#class-blob).
 
+@param blob
+@param customDetectors - Optional: An Iterable of Detector functions. They are called in the order provided.
+@returns The detected file type and MIME type, or `undefined` when there is no match.
+
 @example
 ```
 import {fileTypeFromBlob} from 'file-type';
@@ -434,4 +474,4 @@ console.log(await fileTypeFromBlob(blob));
 //=> {ext: 'txt', mime: 'plain/text'}
 ```
 */
-export declare function fileTypeFromBlob(blob: Blob): Promise<FileTypeResult | undefined>;
+export declare function fileTypeFromBlob(blob: Blob, customDetectors?: Iterable<Detector>): Promise<FileTypeResult | undefined>;
diff --git a/core.js b/core.js
@@ -10,16 +10,16 @@ import {extensions, mimeTypes} from './supported.js';
 
 const minimumBytes = 4100; // A fair amount of file-types are detectable within this range.
 
-export async function fileTypeFromStream(stream) {
+export async function fileTypeFromStream(stream, customDetectors) {
 	const tokenizer = await strtok3.fromStream(stream);
 	try {
-		return await fileTypeFromTokenizer(tokenizer);
+		return await fileTypeFromTokenizer(tokenizer, customDetectors);
 	} finally {
 		await tokenizer.close();
 	}
 }
 
-export async function fileTypeFromBuffer(input) {
+export async function fileTypeFromBuffer(input, customDetectors) {
 	if (!(input instanceof Uint8Array || input instanceof ArrayBuffer)) {
 		throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`Buffer\` or \`ArrayBuffer\`, got \`${typeof input}\``);
 	}
@@ -30,12 +30,12 @@ export async function fileTypeFromBuffer(input) {
 		return;
 	}
 
-	return fileTypeFromTokenizer(strtok3.fromBuffer(buffer));
+	return fileTypeFromTokenizer(strtok3.fromBuffer(buffer), customDetectors);
 }
 
-export async function fileTypeFromBlob(blob) {
+export async function fileTypeFromBlob(blob, customDetectors) {
 	const buffer = await blob.arrayBuffer();
-	return fileTypeFromBuffer(new Uint8Array(buffer));
+	return fileTypeFromBuffer(new Uint8Array(buffer), customDetectors);
 }
 
 function _check(buffer, headers, options) {
@@ -59,9 +59,23 @@ function _check(buffer, headers, options) {
 	return true;
 }
 
-export async function fileTypeFromTokenizer(tokenizer) {
+async function runCustomDetectors(tokenizer, detectors) {
+	if (detectors) {
+		for (const detector of detectors) {
+			const fileType = await detector(tokenizer);
+			if (fileType) {
+				return fileType;
+			}
+		}
+	}
+
+	return undefined;
+}
+
+export async function fileTypeFromTokenizer(tokenizer, customDetectors) {
 	try {
-		return new FileTypeParser().parse(tokenizer);
+		return await runCustomDetectors(tokenizer, customDetectors)
+			|| await new FileTypeParser().parse(tokenizer, customDetectors);
 	} catch (error) {
 		if (!(error instanceof strtok3.EndOfStreamError)) {
 			throw error;
@@ -78,7 +92,7 @@ class FileTypeParser {
 		return this.check(stringToBytes(header), options);
 	}
 
-	async parse(tokenizer) {
+	async parse(tokenizer, customDetectors) {
 		this.buffer = Buffer.alloc(minimumBytes);
 
 		// Keep reading until EOF if the file size is unknown.
@@ -211,7 +225,7 @@ class FileTypeParser {
 			}
 
 			await tokenizer.ignore(id3HeaderLength);
-			return fileTypeFromTokenizer(tokenizer); // Skip ID3 header, recursion
+			return fileTypeFromTokenizer(tokenizer, customDetectors); // Skip ID3 header, recursion
 		}
 
 		// Musepack, SV7
@@ -1602,7 +1616,7 @@ class FileTypeParser {
 	}
 }
 
-export async function fileTypeStream(readableStream, {sampleSize = minimumBytes} = {}) {
+export async function fileTypeStream(readableStream, {sampleSize = minimumBytes} = {}, customDetectors) {
 	const {default: stream} = await import('node:stream');
 
 	return new Promise((resolve, reject) => {
@@ -1618,7 +1632,7 @@ export async function fileTypeStream(readableStream, {sampleSize = minimumBytes}
 					// Read the input stream and detect the filetype
 					const chunk = readableStream.read(sampleSize) ?? readableStream.read() ?? Buffer.alloc(0);
 					try {
-						const fileType = await fileTypeFromBuffer(chunk);
+						const fileType = await fileTypeFromBuffer(chunk, customDetectors);
 						pass.fileType = fileType;
 					} catch (error) {
 						if (error instanceof strtok3.EndOfStreamError) {

diff --git a/fixture/fixture.unicorn b/fixture/fixture.unicorn
@@ -0,0 +1 @@
+UNICORN FILE CONTENT
diff --git a/index.js b/index.js
@@ -1,10 +1,10 @@
 import * as strtok3 from 'strtok3';
 import {fileTypeFromTokenizer} from './core.js';
 
-export async function fileTypeFromFile(path) {
+export async function fileTypeFromFile(path, customDetectors) {
 	const tokenizer = await strtok3.fromFile(path);
 	try {
-		return await fileTypeFromTokenizer(tokenizer);
+		return await fileTypeFromTokenizer(tokenizer, customDetectors);
 	} finally {
 		await tokenizer.close();
 	}

diff --git a/readme.md b/readme.md
@@ -105,7 +105,7 @@ console.log(fileType);
 
 ## API
 
-### fileTypeFromBuffer(buffer)
+### fileTypeFromBuffer(buffer, customDetectors)
 
 Detect the file type of a `Buffer`, `Uint8Array`, or `ArrayBuffer`.
 
@@ -126,7 +126,13 @@ Type: `Buffer | Uint8Array | ArrayBuffer`
 
 A buffer representing file data. It works best if the buffer contains the entire file, it may work with a smaller portion as well.
 
-### fileTypeFromFile(filePath)
+#### customDetectors
+
+Type: `Iterable<Detector>`
+
+Optional: An Iterable of [Detector](#custom-detectors) functions. They are called in the order provided.
+
+### fileTypeFromFile(filePath, customDetectors)
 
 Detect the file type of a file path.
 
@@ -145,7 +151,14 @@ Type: `string`
 
 The file path to parse.
 
-### fileTypeFromStream(stream)
+#### customDetectors
+
+Type: `Iterable<Detector>`
+
+Optional: An Iterable of [Detector](#custom-detectors) functions. They are called in the order provided.
+
+
+### fileTypeFromStream(stream, customDetectors)
 
 Detect the file type of a Node.js [readable stream](https://nodejs.org/api/stream.html#stream_class_stream_readable).
 
@@ -164,7 +177,14 @@ Type: [`stream.Readable`](https://nodejs.org/api/stream.html#stream_class_stream
 
 A readable stream representing file data.
 
-### fileTypeFromBlob(blob)
+#### customDetectors
+
+Type: `Iterable<Detector>`
+
+Optional: An Iterable of [Detector](#custom-detectors) functions. They are called in the order provided.
+
+
+### fileTypeFromBlob(blob, customDetectors)
 
 Detect the file type of a [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob).
 
@@ -189,7 +209,18 @@ console.log(await fileTypeFromBlob(blob));
 //=> {ext: 'txt', mime: 'plain/text'}
 ```
 
-### fileTypeFromTokenizer(tokenizer)
+#### blob
+
+Type: [`Blob`](https://developer.mozilla.org/en-US/docs/Web/API/Blob)
+
+#### customDetectors
+
+Type: `Iterable<Detector>`
+
+Optional: An Iterable of [Detector](#custom-detectors) functions. They are called in the order provided.
+
+
+### fileTypeFromTokenizer(tokenizer, customDetectors)
 
 Detect the file type from an `ITokenizer` source.
 
@@ -248,7 +279,13 @@ Type: [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer)
 
 A file source implementing the [tokenizer interface](https://github.com/Borewit/strtok3#tokenizer).
 
-### fileTypeStream(readableStream, options?)
+#### customDetectors
+
+Type: `Iterable<Detector>`
+
+Optional: An Iterable of [Detector](#custom-detectors) functions. They are called in the order provided.
+
+### fileTypeStream(readableStream, options?, customDetectors)
 
 Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
 
@@ -297,6 +334,13 @@ Type: [`stream.Readable`](https://nodejs.org/api/stream.html#stream_class_stream
 
 The input stream.
 
+#### customDetectors
+
+Type: `Iterable<Detector>`
+
+Optional: An Iterable of [Detector](#custom-detectors) functions. They are called in the order provided.
+
+
 ### supportedExtensions
 
 Returns a `Set<string>` of supported file extensions.
@@ -469,6 +513,48 @@ The following file types will not be accepted:
 - `.csv` - [Reason.](https://github.com/sindresorhus/file-type/issues/264#issuecomment-568439196)
 - `.svg` - Detecting it requires a full-blown parser. Check out [`is-svg`](https://github.com/sindresorhus/is-svg) for something that mostly works.
 
+
+## Custom detectors
+
+A custom detector is a function that allows specifying custom detection mechanisms.
+
+An iterable of detectors can be provided as argument for filetype detection methods.
+
+The detectors are called before the default detections in the provided order.
+
+Custom detectors can be used to add new FileTypeResults or to modify return behaviour of existing FileTypeResult detections.
+
+If the detector returns `undefined`, the `tokenizer.position` should be 0 (unless it's a stream). That allows other detectors to parse the file.
+
+Example detector array which can be extended and provided as argument to each public method:
+```
+const customDetectors = [
+	async tokenizer => {
+		const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" as decimal string
+		const buffer = Buffer.alloc(7);
+		await tokenizer.peekBuffer(buffer, {length: unicornHeader.length, mayBeLess: true});
+		if (unicornHeader.every((value, index) => value === buffer[index])) {
+			return {ext: 'unicorn', mime: 'application/unicorn'};
+		}
+
+		return undefined;
+	},
+];
+```
+#### tokenizer
+
+Type: [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer)
+
+Usable as source of the examined file.
+
+#### fileType
+
+Type: FileTypeResult
+
+Object having an `ext` (extension) and `mime` (mime type) property.
+
+Detected by the standard detections or a previous custom detection. Undefined if no matching fileTypeResult could be found.
+
 ## Related
 
 - [file-type-cli](https://github.com/sindresorhus/file-type-cli) - CLI for this module