-
Notifications
You must be signed in to change notification settings - Fork 47
/
info.go
65 lines (56 loc) · 1.34 KB
/
info.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
package html2article
import (
"math"
"golang.org/x/net/html"
)
type Info struct {
TextCount int
LinkTextCount int
TagCount int
LinkTagCount int
LeafList []int
Density float64
DensitySum float64
Pcount int
InputCount int
ImageCount int
Data string
avg float64
score float64
node *html.Node
}
func NewInfo() *Info {
return &Info{}
}
func (info *Info) CalScore(sn_sum, swn_sum float64) {
sn := countSn(info.Data)
swn := countStopWords(info.Data)
info.avg = info.getAvg()
info.score = info.DensitySum * math.Log(info.avg) * math.Log10(float64(info.Pcount+2)) * (float64(sn)/sn_sum + 2) * (float64(swn)/swn_sum + 2)
// return
// if info.score >= 0 {
// c := attr(info.node, "class")
// if c == "" {
// c = attr(info.node, "id")
// }
// if true {
// println("class:", c, info.score, info.DensitySum, math.Log(info.avg), math.Log10(float64(info.Pcount+2)), (float64(sn)/sn_sum + 2), (float64(swn)/swn_sum + 2), sn, sn_sum)
// }
// }
}
func (info *Info) getAvg() float64 {
if len(info.LeafList) == 0 {
return 0
}
flen := float64(len(info.LeafList))
sum := 0
for _, l := range info.LeafList {
sum += l
}
var sum2 float64 = 0
avg := float64(sum) / flen
for _, l := range info.LeafList {
sum2 += (avg - float64(l)) * (avg - float64(l))
}
return math.Sqrt(sum2/flen + 1.0)
}