Skip to content

Commit

Permalink
XSS: Fix sanitizeUrl vbscript/data xss
Browse files Browse the repository at this point in the history
I believe this fixes https://www.npmjs.com/advisories/1219 if
`options.disableParsingRawHTML` is set.

NOTE: This does not handle script elements, etc., that may be rendered
when `options.disableParsingRawHTML` is not enabled. We might be able to
use something like [`dompurify`](https://github.com/cure53/DOMPurify) to
solve that case?

According to https://owasp.org/www-community/xss-filter-evasion-cheatsheet ,
the dangerous `javascript:` protocol can contain some whitespace
characters and still be vulnerable, and sometimes when used in
conjunction with images, some other special characters like ` or <
before the javascript: protocol can also leave a url vulnerable.

This change re-adds the sanitiation logic removed in 9c6c782 , and also
adds the vbscript/data handling from github.com/ariabuckles/simple-markdown/pull/63

Test plan:

Add tests and run `npm test`
  • Loading branch information
ariabuckles committed May 22, 2020
1 parent 8dfbc86 commit 105d6a6
Show file tree
Hide file tree
Showing 2 changed files with 187 additions and 36 deletions.
216 changes: 183 additions & 33 deletions index.compiler.spec.js
Expand Up @@ -925,6 +925,48 @@ describe('links', () => {
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize markdown links containing JS expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});

render(compiler('![foo](javascript:doSomethingBad)'));

expect(root.innerHTML).toMatchInlineSnapshot(`
<img data-reactroot
alt="foo"
>
`);

expect(console.warn).toHaveBeenCalled();
});

it('should sanitize markdown links containing Data expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});
render(compiler('[foo](data:doSomethingBad)'));
expect(root.innerHTML).toMatchInlineSnapshot(`
<a data-reactroot>
foo
</a>
`);
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize markdown links containing VBScript expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});
render(compiler('[foo](vbScript:doSomethingBad)'));
expect(root.innerHTML).toMatchInlineSnapshot(`
<a data-reactroot>
foo
</a>
`);
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize markdown links containing encoded JS expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});

Expand Down Expand Up @@ -957,6 +999,60 @@ describe('links', () => {
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize markdown links containing padded encoded vscript expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});

render(compiler('[foo]( VBScript%3AdoSomethingBad)'));

expect(root.innerHTML).toMatchInlineSnapshot(`
<a data-reactroot>
foo
</a>
`);
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize markdown images containing padded encoded vscript expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});
render(compiler('![foo]( VBScript%3AdoSomethingBad)'));
expect(root.innerHTML).toMatchInlineSnapshot(`
<img data-reactroot
alt="foo"
>
`);
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize markdown links containing padded encoded data expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});
render(compiler('[foo](`<data:doSomethingBad)'));
expect(root.innerHTML).toMatchInlineSnapshot(`
<a data-reactroot>
foo
</a>
`);
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize markdown images containing padded encoded data expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});
render(compiler('![foo](`<data:doSomethingBad)'));
expect(root.innerHTML).toMatchInlineSnapshot(`
<img data-reactroot
alt="foo"
>
`);
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize markdown links containing invalid characters', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});

Expand All @@ -973,20 +1069,65 @@ describe('links', () => {
});

it('should sanitize html links containing JS expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});
jest.spyOn(console, 'warn').mockImplementation(() => {});

render(compiler('<a href="javascript:doSomethingBad">foo</a>'));

expect(root.innerHTML).toMatchInlineSnapshot(`
<a data-reactroot>
foo
</a>
`);

render(compiler('<a href="javascript:doSomethingBad">foo</a>'));
expect(console.warn).toHaveBeenCalled();
});

expect(root.innerHTML).toMatchInlineSnapshot(`
it('should sanitize html links containing encoded, prefixed data expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});
render(compiler('<a href="<`data:doSomethingBad">foo</a>'));
expect(root.innerHTML).toMatchInlineSnapshot(`
<a data-reactroot>
foo
</a>
`);
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize html images containing encoded, prefixed JS expressions', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});

// TODO: something is off on parsing here, because this prints:
// console.error("Warning: Unknown prop `javascript:alert` on <img> tag"...)
// Which it shouldn't
render(compiler('<img src="`<javascript:alert>`(\'alertstr\')"'));
expect(root.innerHTML).toMatchInlineSnapshot(`
expect(console.warn).toHaveBeenCalled();
});
<span data-reactroot>
<img src="true">
\`('alertstr')"
</span>
`);
expect(console.warn).toHaveBeenCalled();
});

it('should sanitize html images containing weird parsing src=s', () => {
jest.spyOn(console, 'warn').mockImplementation(() => {});
render(compiler('<img src="`<src="javascript:alert(`xss`)">`'));
expect(root.innerHTML).toMatchInlineSnapshot(`
<span data-reactroot>
<img src="\`<src=">
\`
</span>
`);
expect(console.warn).toHaveBeenCalled();
});

it('should handle a link with a URL in the text', () => {
render(
Expand Down Expand Up @@ -1582,14 +1723,14 @@ describe('GFM tables', () => {

it('#241 should not ignore the first cell when its contents is empty', () => {
render(
compiler(
[
'| Foo | Bar | Baz |',
'| --- | --- | --- |',
'| | 2 | 3 |',
'| | 5 | 6 |',
].join('\n')
)
compiler(
[
'| Foo | Bar | Baz |',
'| --- | --- | --- |',
'| | 2 | 3 |',
'| | 5 | 6 |',
].join('\n')
)
);

expect(root.innerHTML).toMatchInlineSnapshot(`
Expand Down Expand Up @@ -1780,7 +1921,6 @@ describe('GFM tables', () => {
`);
});

});

describe('arbitrary HTML', () => {
Expand Down Expand Up @@ -1976,10 +2116,12 @@ describe('arbitrary HTML', () => {
});

it('throws out multiline HTML comments', () => {
render(compiler(`Foo\n<!-- this is
render(
compiler(`Foo\n<!-- this is
a
multiline
comment -->`));
comment -->`)
);

expect(root.innerHTML).toMatchInlineSnapshot(`
Expand Down Expand Up @@ -2800,7 +2942,7 @@ fun main() {
`);
});
it('should not fail with lots of \\n in the middle of the text', () => {
it('should not fail with lots of \\n in the middle of the text', () => {
render(
compiler(
'Text\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\ntext',
Expand All @@ -2825,12 +2967,9 @@ fun main() {

it('should not render html if disableParsingRawHTML is true', () => {
render(
compiler(
'Text with <span>html</span> inside',
{
disableParsingRawHTML: true
}
)
compiler('Text with <span>html</span> inside', {
disableParsingRawHTML: true,
})
);
expect(root.innerHTML).toMatchInlineSnapshot(`
Expand All @@ -2843,12 +2982,9 @@ fun main() {

it('should render html if disableParsingRawHTML is false', () => {
render(
compiler(
'Text with <span>html</span> inside',
{
disableParsingRawHTML: false
}
)
compiler('Text with <span>html</span> inside', {
disableParsingRawHTML: false,
})
);
expect(root.innerHTML).toMatchInlineSnapshot(`
Expand Down Expand Up @@ -2976,7 +3112,15 @@ describe('footnotes', () => {
});

it('should handle complex references', () => {
render(compiler(['foo[^referenc茅 her茅 123] bar', '', '[^referenc茅 her茅 123]: Baz baz'].join('\n')));
render(
compiler(
[
'foo[^referenc茅 her茅 123] bar',
'',
'[^referenc茅 her茅 123]: Baz baz',
].join('\n')
)
);

expect(root.innerHTML).toMatchInlineSnapshot(`
Expand All @@ -3002,7 +3146,13 @@ describe('footnotes', () => {
});

it('should handle conversion of multiple references into links', () => {
render(compiler(['foo[^abc] bar. baz[^def]', '', '[^abc]: Baz baz', '[^def]: Def'].join('\n')));
render(
compiler(
['foo[^abc] bar. baz[^def]', '', '[^abc]: Baz baz', '[^def]: Def'].join(
'\n'
)
)
);

expect(root.innerHTML).toMatchInlineSnapshot(`
Expand Down Expand Up @@ -3410,7 +3560,7 @@ describe('overrides', () => {
render(
compiler('Hello.\n\n', {
overrides: { p: { component: FakeParagraph } },
options: { disableParsingRawHTML: true }
options: { disableParsingRawHTML: true },
})
);

Expand All @@ -3424,7 +3574,7 @@ describe('overrides', () => {
render(
compiler('Hello.\n\n<FakeSpan>I am a fake span</FakeSpan>', {
overrides: { FakeSpan },
options: { disableParsingRawHTML: true }
options: { disableParsingRawHTML: true },
})
);

Expand Down
7 changes: 4 additions & 3 deletions index.js
Expand Up @@ -586,12 +586,13 @@ function reactFor(outputFunc) {

function sanitizeUrl(url) {
try {
const decoded = decodeURIComponent(url);
const decoded = decodeURIComponent(url)
.replace(/[^A-Za-z0-9/:]/g, '');

if (decoded.match(/^\s*javascript:/i)) {
if (decoded.match(/^\s*(javascript|vbscript|data):/i)) {
if (process.env.NODE_ENV !== 'production') {
console.warn(
'Anchor URL contains an unsafe JavaScript expression, it will not be rendered.',
'Anchor URL contains an unsafe JavaScript/VBScript/data expression, it will not be rendered.',
decoded
);
}
Expand Down

0 comments on commit 105d6a6

Please sign in to comment.