-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
64 lines (52 loc) · 1.46 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
'use strict';
const { URL } = require('url');
const Microformats = require('microformat-node');
const versions = {
version: require('./package.json').version,
microformatsVersion: Microformats.version,
livingStandard: Microformats.livingStandard
};
const extractMicroformats = function ($, data) {
return Microformats.getAsync({
html: $.html(),
// TODO: Add support for h-feed? h-event? h-item?
filters: ['h-entry'],
baseUrl: data.baseUrl,
dateFormat: 'w3c'
})
.then(mfData => Object.assign(data, {
microformats: mfData,
microformatsVersion: versions
}));
};
const extractHrefs = function ($, data) {
// TODO: Extract from mf2 data instead – first extract a feed than links for each feed item?
data.hrefs = [];
const links = $('a');
const hrefs = {};
for (let i = 0, length = links.length; i < length; i += 1) {
const href = links.eq(i).attr('href');
try {
if (href) {
const resolvedUrl = (new URL(href, data.baseUrl)).toString();
hrefs[resolvedUrl] = true;
}
} catch (e) {}
}
for (let i in hrefs) {
data.hrefs.push(i);
}
return data;
};
const addToParser = function (parserInstance) {
parserInstance.removeExtractor('headers');
parserInstance.addExtractor('microformats', extractMicroformats);
parserInstance.addExtractor('hrefs', extractHrefs);
return parserInstance;
};
module.exports = {
addToParser,
extractMicroformats,
extractHrefs,
versions
};