/
html_sax_push_parser.c
93 lines (75 loc) · 2.38 KB
/
html_sax_push_parser.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <html_sax_push_parser.h>
/*
* call-seq:
* native_write(chunk, last_chunk)
*
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
*/
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
{
xmlParserCtxtPtr ctx;
const char * chunk = NULL;
int size = 0;
int status = 0;
libxmlStructuredErrorHandlerState handler_state;
Data_Get_Struct(self, xmlParserCtxt, ctx);
if(Qnil != _chunk) {
chunk = StringValuePtr(_chunk);
size = (int)RSTRING_LEN(_chunk);
}
Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);
status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);
Nokogiri_structured_error_func_restore(&handler_state);
if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
// TODO: there appear to be no tests for this block
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
Nokogiri_error_raise(NULL, e);
}
return self;
}
/*
* call-seq:
* initialize_native(xml_sax, filename)
*
* Initialize the push parser with +xml_sax+ using +filename+
*/
static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
VALUE encoding)
{
htmlSAXHandlerPtr sax;
const char * filename = NULL;
htmlParserCtxtPtr ctx;
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
if(_filename != Qnil) filename = StringValueCStr(_filename);
if (!NIL_P(encoding)) {
enc = xmlParseCharEncoding(StringValueCStr(encoding));
if (enc == XML_CHAR_ENCODING_ERROR)
rb_raise(rb_eArgError, "Unsupported Encoding");
}
ctx = htmlCreatePushParserCtxt(
sax,
NULL,
NULL,
0,
filename,
enc
);
if(ctx == NULL)
rb_raise(rb_eRuntimeError, "Could not create a parser context");
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
ctx->sax2 = 1;
DATA_PTR(self) = ctx;
return self;
}
VALUE cNokogiriHtmlSaxPushParser;
void init_html_sax_push_parser()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE html = rb_define_module_under(nokogiri, "HTML");
VALUE sax = rb_define_module_under(html, "SAX");
VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
cNokogiriHtmlSaxPushParser = klass;
rb_define_private_method(klass, "initialize_native", initialize_native, 3);
rb_define_private_method(klass, "native_write", native_write, 2);
}