From 18cfb94c3eb4de7506a318ee6cbc701236f83388 Mon Sep 17 00:00:00 2001 From: Artem Sapegin Date: Wed, 22 Jun 2022 09:41:07 +0200 Subject: [PATCH] fix: Improve matching of words near punctuation Fixes #28 --- index.js | 5 +++-- test.js | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/index.js b/index.js index b22aee4..4fd6180 100644 --- a/index.js +++ b/index.js @@ -133,11 +133,12 @@ function readTermsFile(filepath) { * @param {string} pattern */ function getExactMatchRegExp(pattern) { + const punctuation = '[\\.,;\'")]'; return new RegExp( // 1. Beginning of the string, or any character that isn't "-" or alphanumeric // 2. Exact match of the pattern - // 3. Space, ". ", "." at the end of the string, end of the string - `(?<=^|[^-\\w])\\b${pattern}\\b(?= |\\. |\\.$|$)`, + // 3. Space, punctuation + space, punctuation + punctuation, or punctuation at the end of the string, end of the string + `(?<=^|[^-\\w])\\b${pattern}\\b(?= |${punctuation} |${punctuation}${punctuation}|${punctuation}$|$)`, 'ig' ); } diff --git a/test.js b/test.js index 47b49e0..a94c039 100644 --- a/test.js +++ b/test.js @@ -119,6 +119,19 @@ describe('getMultipleWordRegExp', () => { expect(result[0]).toBe('javascript'); }); + it.each([ + ['Bad Javascript. Is it bad?'], + ['Bad Javascript, is it bad?'], + ['Bad Javascript; is it bad?'], + ['Bad (Javascript) is it bad?'], + ['Bad "Javascript" is it bad?'], + ["Bad 'Javascript' is it bad?"], + ['Bad "Javascript", is it bad?'], + ])('should match a pattern regardless of punctuation: %s', string => { + const result = getMultipleWordRegExp(variants).exec(string); + expect(result).toBeTruthy(); + }); + it('should not match a pattern in as a part of a file name', () => { const result = getMultipleWordRegExp(variants).exec('javascript.md'); expect(result).toBeFalsy(); @@ -148,6 +161,19 @@ describe('getExactMatchRegExp', () => { expect(regexp.test('Webpack')).toBeTruthy(); }); + it.each([ + ['Javascript.'], + ['Javascript,'], + ['Javascript;'], + ['(Javascript)'], + ['"Javascript"'], + ["'Javascript'"], + ['"Javascript",'], + ])('should match a pattern regardless of punctuation: %s', string => { + const regexp = getExactMatchRegExp('javascript'); + expect(regexp.test(string)).toBeTruthy(); + }); + it('returned RegExp should not match in the middle of the word', () => { const regexp = getExactMatchRegExp('webpack'); expect(regexp.test(`FooWebpack`)).toBeFalsy();