-
-
Notifications
You must be signed in to change notification settings - Fork 897
/
nokogiri.rb
144 lines (130 loc) · 4.11 KB
/
nokogiri.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# -*- coding: utf-8 -*-
# frozen_string_literal: true
# Modify the PATH on windows so that the external DLLs will get loaded.
require 'rbconfig'
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
require 'nokogiri/jruby/dependencies'
end
begin
RUBY_VERSION =~ /(\d+\.\d+)/
require "nokogiri/#{$1}/nokogiri"
rescue LoadError => e
if e.message =~ /GLIBC/
warn <<~EOM
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system with glibc < 2.17:
#{e.message}
If that's the case, then please install Nokogiri via the `ruby` platform gem:
gem install nokogiri --platform=ruby
or:
bundle config set force_ruby_platform true
Please visit https://nokogiri.org/tutorials/installing_nokogiri.html for more help.
EOM
raise e
end
require 'nokogiri/nokogiri'
end
require 'nokogiri/version'
require 'nokogiri/syntax_error'
require 'nokogiri/xml'
require 'nokogiri/xslt'
require 'nokogiri/html'
require 'nokogiri/decorators/slop'
require 'nokogiri/css'
require 'nokogiri/html/builder'
# Nokogiri parses and searches XML/HTML very quickly, and also has
# correctly implemented CSS3 selector support as well as XPath 1.0
# support.
#
# Parsing a document returns either a Nokogiri::XML::Document, or a
# Nokogiri::HTML::Document depending on the kind of document you parse.
#
# Here is an example:
#
# require 'nokogiri'
# require 'open-uri'
#
# # Get a Nokogiri::HTML:Document for the page we’re interested in...
#
# doc = Nokogiri::HTML(URI.open('http://www.google.com/search?q=tenderlove'))
#
# # Do funky things with it using Nokogiri::XML::Node methods...
#
# ####
# # Search for nodes by css
# doc.css('h3.r a.l').each do |link|
# puts link.content
# end
#
# See Nokogiri::XML::Searchable#css for more information about CSS searching.
# See Nokogiri::XML::Searchable#xpath for more information about XPath searching.
module Nokogiri
class << self
###
# Parse an HTML or XML document. +string+ contains the document.
def parse string, url = nil, encoding = nil, options = nil
if string.respond_to?(:read) ||
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
# Expect an HTML indicator to appear within the first 512
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
# shouldn't be that long)
Nokogiri.HTML(string, url, encoding,
options || XML::ParseOptions::DEFAULT_HTML)
else
Nokogiri.XML(string, url, encoding,
options || XML::ParseOptions::DEFAULT_XML)
end.tap { |doc|
yield doc if block_given?
}
end
###
# Create a new Nokogiri::XML::DocumentFragment
def make input = nil, opts = {}, &blk
if input
Nokogiri::HTML.fragment(input).children.first
else
Nokogiri(&blk)
end
end
###
# Parse a document and add the Slop decorator. The Slop decorator
# implements method_missing such that methods may be used instead of CSS
# or XPath. For example:
#
# doc = Nokogiri::Slop(<<-eohtml)
# <html>
# <body>
# <p>first</p>
# <p>second</p>
# </body>
# </html>
# eohtml
# assert_equal('second', doc.html.body.p[1].text)
#
def Slop(*args, &block)
Nokogiri(*args, &block).slop!
end
def install_default_aliases
# Make sure to support some popular encoding aliases not known by
# all iconv implementations.
{
'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
}.each { |alias_name, name|
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
}
end
end
Nokogiri.install_default_aliases
end
###
# Parse a document contained in +args+. Nokogiri will try to guess what
# type of document you are attempting to parse. For more information, see
# Nokogiri.parse
#
# To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
def Nokogiri(*args, &block)
if block_given?
Nokogiri::HTML::Builder.new(&block).doc.root
else
Nokogiri.parse(*args)
end
end