diff --git a/packages/adblocker-puppeteer/README.md b/packages/adblocker-puppeteer/README.md index a53db60e29..be28bd2d25 100644 --- a/packages/adblocker-puppeteer/README.md +++ b/packages/adblocker-puppeteer/README.md @@ -68,6 +68,14 @@ PuppeteerBlocker.fromPrebuiltAdsAndTracking(fetch).then((blocker) => { You are ready to block ads! +By default, using the ad blocker disables the browser's built-in resource cache. Pass the `cacheSafe` option to `enableBlockingInPage` to enable the cache. Learn more in [puppeteer's documentation](https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagesetrequestinterceptionvalue-cachesafe). Be careful when using the cache. You may have [problems loading some webpages](https://github.com/puppeteer/puppeteer/pull/6996). + +```javascript +PuppeteerBlocker.fromPrebuiltAdsAndTracking(fetch).then((blocker) => { + blocker.enableBlockingInPage(page, { cacheSafe: true }); +}); +``` + There are other ways you can *create an instance of the blocking engine* to start blocking ads. diff --git a/packages/adblocker-puppeteer/adblocker.ts b/packages/adblocker-puppeteer/adblocker.ts index 06ae4fed04..0f52123a43 100644 --- a/packages/adblocker-puppeteer/adblocker.ts +++ b/packages/adblocker-puppeteer/adblocker.ts @@ -62,7 +62,7 @@ export class BlockingContext { this.onRequest = (request) => blocker.onRequest(request); } - public async enable(): Promise { + public async enable({ cacheSafe }: { cacheSafe: boolean }): Promise { if (this.blocker.config.loadCosmeticFilters === true) { // Register callback to cosmetics injection (CSS + scriptlets) this.page.on('frameattached', this.onFrameNavigated); @@ -73,7 +73,7 @@ export class BlockingContext { if (this.blocker.config.loadNetworkFilters === true) { // Make sure request interception is enabled for `page` before proceeding - await this.page.setRequestInterception(true); + await this.page.setRequestInterception(true, cacheSafe); // NOTES: // - page.setBypassCSP(enabled) might be needed to perform // injections on some pages. @@ -109,7 +109,10 @@ export class PuppeteerBlocker extends FiltersEngine { // Helpers to enable and disable blocking for 'browser' // ----------------------------------------------------------------------- // - public async enableBlockingInPage(page: puppeteer.Page): Promise { + /** + * The `cacheSafe` can be set to `true` to enable the browser's resource cache. By default, caching is disabled, which is puppeteer's default behavior when request interception is enabled. Learn more here: https://github.com/puppeteer/puppeteer/blob/main/docs/api.md#pagesetrequestinterceptionvalue-cachesafe + */ + public async enableBlockingInPage(page: puppeteer.Page, { cacheSafe = false }: { cacheSafe?: boolean } = {}): Promise { let context: undefined | BlockingContext = this.contexts.get(page); if (context !== undefined) { return context; @@ -117,7 +120,7 @@ export class PuppeteerBlocker extends FiltersEngine { context = new BlockingContext(page, this); this.contexts.set(page, context); - await context.enable(); + await context.enable({ cacheSafe }); return context; } diff --git a/packages/adblocker-puppeteer/package.json b/packages/adblocker-puppeteer/package.json index 7ff708f2d4..6246063c99 100644 --- a/packages/adblocker-puppeteer/package.json +++ b/packages/adblocker-puppeteer/package.json @@ -45,10 +45,13 @@ "@types/chai": "^4.2.11", "@types/mocha": "^8.0.0", "chai": "^4.2.0", + "get-port": "^5.1.1", "mocha": "^8.0.1", + "node-fetch": "^2.6.1", "nyc": "^15.0.0", "puppeteer": "9.0.0", "rimraf": "^3.0.0", + "tmp-promise": "^3.0.2", "ts-node": "^9.0.0", "tslint": "^6.0.0", "tslint-config-prettier": "^1.18.0", diff --git a/packages/adblocker-puppeteer/test/adblocker.test.ts b/packages/adblocker-puppeteer/test/adblocker.test.ts index 10613741fb..3d6d249e8d 100644 --- a/packages/adblocker-puppeteer/test/adblocker.test.ts +++ b/packages/adblocker-puppeteer/test/adblocker.test.ts @@ -1,9 +1,14 @@ import { expect } from 'chai'; import 'mocha'; +import * as http from 'http'; +import { EventEmitter } from 'events' +import * as getPort from 'get-port' +import fetch from 'node-fetch' +import * as tmp from 'tmp-promise' import * as puppeteer from 'puppeteer'; -import { fromPuppeteerDetails, getHostnameHashesFromLabelsBackward } from '../adblocker'; +import { fromPuppeteerDetails, getHostnameHashesFromLabelsBackward, PuppeteerBlocker } from '../adblocker'; describe('#fromPuppeteerDetails', () => { const baseFrame: Partial = { @@ -36,3 +41,86 @@ describe('#fromPuppeteerDetails', () => { }); }); }); + +/** + * Overview of what these tests do: + * - Start a server that serves a basic webpage. The webpage loads just one resource after the initial request: a stylesheet with a `Cache-Control` header indicating that it should be cached. + * - Visit the webpage twice in a row, with or without the cache enabled, and count the total number of requests that the server handles to serve the visits to make sure the cache worked as intended. + * - Without the cache, there should be two server requests for each page visit. One for the main HTML and one for the CSS. + * - With the cache enabled, but starting empty, there should be two server requests on the first visit, and one on the second visit, after the CSS is cached. + * + * Chromium doesn't seem to allow entire HTML pages to be cached without revalidation, which is why the test uses a cached stylesheet, rather than trying to cache the main page. + */ +describe(`cacheSafe option`, () => { + const serverRequestEvents = new EventEmitter() // used to notify the test about server requests so the test can count requests + const requestReceivedEventName = `request received` + const cachingServer = http.createServer((request, response) => { + serverRequestEvents.emit(requestReceivedEventName) + + if (request.url == null) { + response.writeHead(400) + return response.end() + } + + if (request.url.endsWith(`/`)) { + response.end(` + + content + `) + } else if (request.url.endsWith(`/style.css`)) { + response.setHeader(`Cache-Control`, `max-age=3600`) + response.end(`* { key: value }`) + } + }) + /** Starts the server on a randomly-chosen, available port and returns the URL that can be used to reach the server. */ + const pendingServerUrl = new Promise(async (resolve, reject) => { + try { + const port = await getPort() + + cachingServer + .listen(port, () => resolve(`http://localhost:${port}`)) + .once(`error`, reject) + } catch(error: unknown) { + reject(error) + } + }) + const getRandomTemporaryDirectory = async (): Promise => (await tmp.dir({ unsafeCleanup: true })).path + + after(done => cachingServer.close(done)) + + it(`is disabled by default, and no browser caching happens`, async () => { + let serverRequestCount = 0 + serverRequestEvents.on(requestReceivedEventName, () => serverRequestCount += 1) + + const browser = await puppeteer.launch({ + userDataDir: await getRandomTemporaryDirectory() + }) + const page = await browser.newPage() + const blocker = await PuppeteerBlocker.fromPrebuiltAdsOnly(fetch) + + blocker.enableBlockingInPage(page) + await page.goto(await pendingServerUrl) + await page.goto(await pendingServerUrl) + await browser.close() + + expect(serverRequestCount).to.equal(4) + }) + + it(`can be enabled, which causes the browser to cache resources`, async () => { + let serverRequestCount = 0 + serverRequestEvents.on(requestReceivedEventName, () => serverRequestCount += 1) + + const browser = await puppeteer.launch({ + userDataDir: await getRandomTemporaryDirectory() + }) + const page = await browser.newPage() + const blocker = await PuppeteerBlocker.fromPrebuiltAdsOnly(fetch) + + blocker.enableBlockingInPage(page, { cacheSafe: true }) + await page.goto(await pendingServerUrl) + await page.goto(await pendingServerUrl) + await browser.close() + + expect(serverRequestCount).to.equal(3) + }) +})