Skip to content

Commit

Permalink
Properly parse meta tag when fields are reversed
Browse files Browse the repository at this point in the history
  • Loading branch information
isaacs committed Oct 18, 2019
1 parent 64f6d63 commit 2dc94ae
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 0 deletions.
6 changes: 6 additions & 0 deletions lib/body.js
Expand Up @@ -293,6 +293,12 @@ const convertBody = (buffer, headers) => {
if (!res && str) {
res = /<meta[\s]+?http-equiv=(['"])content-type\1[\s]+?content=(['"])(.+?)\2/i.exec(str)

if (!res) {
res = /<meta[\s]+?content=(['"])(.+?)\1[\s]+?http-equiv=(['"])content-type\3/i.exec(str)
if (res)
res.pop() // drop last quote
}

if (res)
res = /charset=(.*)/i.exec(res.pop())
}
Expand Down
7 changes: 7 additions & 0 deletions test/body.js
Expand Up @@ -307,6 +307,13 @@ t.test('convert body', t => {
t.equal(await b.textConverted(), s)
})

t.test('html4 meta tag reversed', async t => {
const s = '<meta content="text/html; charset=gbk" http-equiv="Content-Type"><div>中文L</div>'
const b = new Body(convert(s, 'gbk'))
t.equal(await b.textConverted(), s)
})


t.test('html5 meta tag', async t => {
const s = '<meta charset="gbk"><div>中文</div>'
const b = new Body(convert(s, 'gbk'))
Expand Down
6 changes: 6 additions & 0 deletions test/fixtures/server.js
Expand Up @@ -164,6 +164,12 @@ class TestServer {
res.end(convert('<meta http-equiv="Content-Type" content="text/html; charset=gb2312"><div>中文</div>', 'gb2312'))
}

if (p === '/encoding/gb2312-reverse') {
res.statusCode = 200;
res.setHeader('Content-Type', 'text/html');
res.end(convert('<meta content="text/html; charset=gb2312" http-equiv="Content-Type"><div>中文</div>', 'gb2312'));
}

if (p === '/encoding/shift-jis') {
res.statusCode = 200
res.setHeader('Content-Type', 'text/html; charset=Shift-JIS')
Expand Down
8 changes: 8 additions & 0 deletions test/index.js
Expand Up @@ -1767,6 +1767,14 @@ t.test('with optional `encoding`', t => {
})
}))

t.test('encoding decode, html4 detect reverse http-equiv', t =>
fetch(`${base}encoding/gb2312-reverse`).then(res => {
t.equal(res.status, 200)
return res.textConverted().then(result => {
t.equal(result, '<meta content="text/html; charset=gb2312" http-equiv="Content-Type"><div>中文</div>')
})
}))

t.test('default to utf8 encoding', t =>
fetch(`${base}encoding/utf8`).then(res => {
t.equal(res.status, 200)
Expand Down

0 comments on commit 2dc94ae

Please sign in to comment.