-
Notifications
You must be signed in to change notification settings - Fork 5
/
index.js
100 lines (93 loc) · 3.57 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
'use strict'
/**
* Extracted from https://github.com/simbo/metalsmith-better-excerpts
* (published under MIT license)
*/
const cheerio = require('cheerio')
const unescapeHTML = require('he').unescape
const stripTags = require('striptags')
const truncate = require('lodash.truncate')
/**
* retrieve excerpt from file object by extracting contents until a 'more' tag
* @param {string} html file object
* @param {RegExp} regExp 'more' tag regexp
* @return {string} excerpt string or undefined
*/
function getExcerptByMoreTag (html, regExp) {
html = cheerio.load('<root>' + html + '</root>')('root').html()
const match = html.search(regExp)
if (match > -1) {
const excerpt = html.slice(0, Buffer.byteLength(html.slice(0, match)))
return unescapeHTML(excerpt)
}
}
/**
* retrieve excerpt from file object by extracting the first p's contents
* @param {string} html file object
* @return {string} excerpt string
*/
function getExcerptByFirstParagraph (html) {
const $ = cheerio.load(html)
const isEmpty = element => $(element).text().trim().length === 0
const p = $('p').filter(
(_index, element) => !isEmpty(element)
).first()
const excerpt = p.length ? p.html().trim() : html
return unescapeHTML(excerpt)
}
/**
* @param {string} excerpt Already extracted excerpt
* @param {Object} options stripping options
* @param {number} [options.pruneLength]
* @param {string} [options.pruneSeparator]
* @param {string} [options.pruneString]
* @return {string} The striped and pruned excerpt
*/
function stripTagsFromExcerpt (excerpt, options) {
excerpt = stripTags(excerpt)
excerpt = excerpt.replace(/^\s+|\s+$|\s+(?=\s)/g, '')
const pruneLength = typeof options.pruneLength === 'number' ? options.pruneLength : 140
if (pruneLength > 0) {
excerpt = truncate(excerpt, {
length: pruneLength,
omission: typeof options.pruneString === 'string' ? options.pruneString : '…',
separator: typeof options.pruneSeparator === 'string' ? options.pruneSeparator : ' '
})
}
return excerpt
}
/**
* Extracts the raw excerpt (without stripped tags) from the html
*
* @param {string} html Html string to look for the excerpt
* @param {RegExp} [moreRegExp=/\s*<!--\s*more\s*-->/i] RegExp used to look for the end of the excerpt
* @return If found, the excerpt from the more tag, else the excerpt contained in the first <p></p>
*/
function getRawExcerpt (html, moreRegExp) {
if (!moreRegExp) {
moreRegExp = /\s*<!--\s*more\s*-->/i
}
return getExcerptByMoreTag(html, moreRegExp) || getExcerptByFirstParagraph(html)
}
/**
* Parses the excerpt for a given html string.
*
* @param {string} html Html code to parse for the excerpt.
* @param {Object} [options] Options for parsing.
* @param {RegExp} [options.moreRegExp=/\s*<!--\s*more\s*-->/i] Regexp to look for the end of the excerpt. If this is not found
* @param {boolean} [options.stripTags=true] Strip the tags from the html code when getting the excerpt.
* @param {number} [options.pruneLength=140] Maximum size of the excerpt (only functional if stripTags=true)
* @param {string} [options.pruneSeparator=' '] Character to look for when truncating a text
* @param {string} [options.pruneString='…'] String to be attached if pruning needs to happen
* @returns {string} The excerpt found in the given html code.
*/
module.exports = function excerptHtml (html, options) {
if (!options) {
options = {}
}
const rawExcerpt = getRawExcerpt(html, options.moreRegExp)
if (options.stripTags === false) {
return rawExcerpt
}
return stripTagsFromExcerpt(rawExcerpt, options)
}