diff --git a/ext/java/nokogiri/Html4SaxPushParser.java b/ext/java/nokogiri/Html4SaxPushParser.java index d9aa6959e6..8ff7088b6c 100644 --- a/ext/java/nokogiri/Html4SaxPushParser.java +++ b/ext/java/nokogiri/Html4SaxPushParser.java @@ -1,31 +1,26 @@ package nokogiri; -import static nokogiri.XmlSaxPushParser.terminateExecution; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static org.jruby.runtime.Helpers.invoke; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.io.IOException; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; -import java.util.concurrent.ThreadFactory; - -import nokogiri.internals.*; - +import nokogiri.internals.ClosedStreamException; +import nokogiri.internals.NokogiriBlockingQueueInputStream; +import nokogiri.internals.NokogiriHelpers; +import nokogiri.internals.ParserContext; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.RubyObject; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; -import org.jruby.exceptions.RaiseException; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.*; + +import static nokogiri.XmlSaxPushParser.terminateExecution; +import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; +import static org.jruby.runtime.Helpers.invoke; + /** * Class for Nokogiri::HTML4::SAX::PushParser * @@ -134,7 +129,7 @@ public class Html4SaxPushParser extends RubyObject if (!options.recover && parserTask.getErrorCount() > errorCount0) { terminateTask(context.runtime); - throw parserTask.getLastError(); + throw parserTask.getLastError().toThrowable(); } return this; diff --git a/ext/java/nokogiri/XmlSaxParserContext.java b/ext/java/nokogiri/XmlSaxParserContext.java index 920b38e964..573c069740 100644 --- a/ext/java/nokogiri/XmlSaxParserContext.java +++ b/ext/java/nokogiri/XmlSaxParserContext.java @@ -1,33 +1,23 @@ package nokogiri; -import static org.jruby.runtime.Helpers.invoke; - -import java.io.IOException; -import java.io.InputStream; - +import nokogiri.internals.*; import org.apache.xerces.parsers.AbstractSAXParser; import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.RubyFixnum; -import org.jruby.RubyModule; -import org.jruby.RubyObjectAdapter; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.exceptions.RaiseException; -import org.jruby.javasupport.JavaEmbedUtils; +import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; -import org.xml.sax.ContentHandler; -import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; -import org.xml.sax.SAXNotRecognizedException; -import org.xml.sax.SAXNotSupportedException; import org.xml.sax.SAXParseException; -import nokogiri.internals.NokogiriHandler; -import nokogiri.internals.NokogiriHelpers; -import nokogiri.internals.ParserContext; -import nokogiri.internals.XmlSaxParser; +import java.io.IOException; +import java.io.InputStream; + +import static org.jruby.runtime.Helpers.invoke; /** * Base class for the SAX parsers. @@ -51,6 +41,7 @@ public class XmlSaxParserContext extends ParserContext protected AbstractSAXParser parser; protected NokogiriHandler handler; + protected NokogiriErrorHandler errorHandler; private boolean replaceEntities = true; private boolean recovery = false; @@ -168,31 +159,12 @@ public class XmlSaxParserContext extends ParserContext return (XmlSaxParserContext) NokogiriService.XML_SAXPARSER_CONTEXT_ALLOCATOR.allocate(runtime, klazz); } - /** - * Set a property of the underlying parser. - */ - protected void - setProperty(String key, Object val) - throws SAXNotRecognizedException, SAXNotSupportedException - { - parser.setProperty(key, val); - } - - protected void - setContentHandler(ContentHandler handler) - { - parser.setContentHandler(handler); - } - - protected void - setErrorHandler(ErrorHandler handler) - { - parser.setErrorHandler(handler); - } - public final NokogiriHandler getNokogiriHandler() { return handler; } + public final NokogiriErrorHandler + getNokogiriErrorHandler() { return errorHandler; } + /** * Perform any initialization prior to parsing with the handler * handlerRuby. Convenience hook for subclasses. @@ -223,6 +195,17 @@ public class XmlSaxParserContext extends ParserContext parser.parse(getInputSource()); } + protected static Options + defaultParseOptions(ThreadContext context) + { + return new ParserContext.Options( + RubyFixnum.fix2long(Helpers.invoke(context, + ((RubyClass)context.getRuntime().getClassFromPath("Nokogiri::XML::ParseOptions")) + .getConstant("DEFAULT_XML"), + "to_i")) + ); + } + @JRubyMethod public IRubyObject parse_with(ThreadContext context, IRubyObject handlerRuby) @@ -233,14 +216,19 @@ public class XmlSaxParserContext extends ParserContext throw runtime.newArgumentError("argument must respond_to document"); } - NokogiriHandler handler = this.handler = new NokogiriHandler(runtime, handlerRuby); - preParse(runtime, handlerRuby, handler); + /* TODO: how should we pass in parse options? */ + ParserContext.Options options = defaultParseOptions(context); + + errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning); + handler = new NokogiriHandler(runtime, handlerRuby, errorHandler); - setContentHandler(handler); - setErrorHandler(handler); + preParse(runtime, handlerRuby, handler); + parser.setContentHandler(handler); + parser.setErrorHandler(handler); + parser.setEntityResolver(new NokogiriEntityResolver(runtime, errorHandler, options)); try { - setProperty("http://xml.org/sax/properties/lexical-handler", handler); + parser.setProperty("http://xml.org/sax/properties/lexical-handler", handler); } catch (Exception ex) { throw runtime.newRuntimeError("Problem while creating XML SAX Parser: " + ex.toString()); } @@ -270,8 +258,6 @@ public class XmlSaxParserContext extends ParserContext postParse(runtime, handlerRuby, handler); - //maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby); - return runtime.getNil(); } @@ -319,53 +305,6 @@ public class XmlSaxParserContext extends ParserContext return context.runtime.newBoolean(recovery); } - /** - * If the handler's document is a FragmentHandler, attempt to trim - * leading and trailing whitespace. - * - * This is a bit hackish and depends heavily on the internals of - * FragmentHandler. - */ - protected void - maybeTrimLeadingAndTrailingWhitespace(ThreadContext context, IRubyObject parser) - { - RubyObjectAdapter adapter = JavaEmbedUtils.newObjectAdapter(); - RubyModule mod = context.getRuntime().getClassFromPath("Nokogiri::XML::FragmentHandler"); - - IRubyObject handler = adapter.getInstanceVariable(parser, "@document"); - if (handler == null || handler.isNil() || !adapter.isKindOf(handler, mod)) { - return; - } - IRubyObject stack = adapter.getInstanceVariable(handler, "@stack"); - if (stack == null || stack.isNil()) { - return; - } - // doc is finally a DocumentFragment whose nodes we can check - IRubyObject doc = adapter.callMethod(stack, "first"); - if (doc == null || doc.isNil()) { - return; - } - - IRubyObject children; - - for (;;) { - children = adapter.callMethod(doc, "children"); - IRubyObject first = adapter.callMethod(children, "first"); - if (NokogiriHelpers.isBlank(first)) { adapter.callMethod(first, "unlink"); } - else { break; } - } - - for (;;) { - children = adapter.callMethod(doc, "children"); - IRubyObject last = adapter.callMethod(children, "last"); - if (NokogiriHelpers.isBlank(last)) { adapter.callMethod(last, "unlink"); } - else { break; } - } - - // While we have a document, normalize it. - ((XmlNode) doc).normalize(); - } - @JRubyMethod(name = "column") public IRubyObject column(ThreadContext context) @@ -383,5 +322,4 @@ public class XmlSaxParserContext extends ParserContext if (number == null) { return context.getRuntime().getNil(); } return RubyFixnum.newFixnum(context.getRuntime(), number.longValue()); } - } diff --git a/ext/java/nokogiri/XmlSaxPushParser.java b/ext/java/nokogiri/XmlSaxPushParser.java index 019965df8c..81bbb0c5bc 100644 --- a/ext/java/nokogiri/XmlSaxPushParser.java +++ b/ext/java/nokogiri/XmlSaxPushParser.java @@ -1,20 +1,9 @@ package nokogiri; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static org.jruby.runtime.Helpers.invoke; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.FutureTask; -import java.util.concurrent.ThreadFactory; - +import nokogiri.internals.*; import org.jruby.Ruby; import org.jruby.RubyClass; +import org.jruby.RubyException; import org.jruby.RubyObject; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; @@ -22,11 +11,14 @@ import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; -import nokogiri.internals.ClosedStreamException; -import nokogiri.internals.NokogiriBlockingQueueInputStream; -import nokogiri.internals.NokogiriHandler; -import nokogiri.internals.NokogiriHelpers; -import nokogiri.internals.ParserContext; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +import java.util.concurrent.*; + +import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; +import static org.jruby.runtime.Helpers.invoke; /** * Class for Nokogiri::XML::SAX::PushParser @@ -159,7 +151,8 @@ public class XmlSaxPushParser extends RubyObject if (!options.recover && parserTask.getErrorCount() > errorCount0) { terminateTask(context.runtime); - throw ex = parserTask.getLastError(); + ex = parserTask.getLastError().toThrowable(); + throw ex; } return this; @@ -278,16 +271,15 @@ static class ParserTask extends ParserContext.ParserTask getErrorCount() { // check for null because thread may not have started yet - if (parser.getNokogiriHandler() == null) { return 0; } - return parser.getNokogiriHandler().getErrorCount(); + if (parser.getNokogiriErrorHandler() == null) { return 0; } + return parser.getNokogiriErrorHandler().getErrors().size(); } - synchronized final RaiseException + synchronized final RubyException getLastError() { - return parser.getNokogiriHandler().getLastError(); + List errors = parser.getNokogiriErrorHandler().getErrors(); + return errors.get(errors.size() - 1); } - } - } diff --git a/ext/java/nokogiri/internals/NokogiriEntityResolver.java b/ext/java/nokogiri/internals/NokogiriEntityResolver.java index 6ee5de87bd..eb9a8b5b3d 100644 --- a/ext/java/nokogiri/internals/NokogiriEntityResolver.java +++ b/ext/java/nokogiri/internals/NokogiriEntityResolver.java @@ -85,7 +85,7 @@ public class NokogiriEntityResolver implements EntityResolver2 private void addError(String errorMessage) { - if (handler != null) { handler.errors.add(new Exception(errorMessage)); } + if (handler != null) { handler.addError(new Exception(errorMessage)); } } /** diff --git a/ext/java/nokogiri/internals/NokogiriErrorHandler.java b/ext/java/nokogiri/internals/NokogiriErrorHandler.java index 51d8e05dae..9c4683ee48 100644 --- a/ext/java/nokogiri/internals/NokogiriErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriErrorHandler.java @@ -1,11 +1,15 @@ package nokogiri.internals; -import java.util.ArrayList; -import java.util.List; - +import nokogiri.XmlSyntaxError; import org.apache.xerces.xni.parser.XMLErrorHandler; +import org.jruby.Ruby; +import org.jruby.RubyException; +import org.jruby.exceptions.RaiseException; import org.xml.sax.ErrorHandler; +import java.util.ArrayList; +import java.util.List; + /** * Super class of error handlers. * @@ -17,23 +21,40 @@ */ public abstract class NokogiriErrorHandler implements ErrorHandler, XMLErrorHandler { - protected final List errors; + private final Ruby runtime; + protected final List errors; protected boolean noerror; protected boolean nowarning; public - NokogiriErrorHandler(boolean noerror, boolean nowarning) + NokogiriErrorHandler(Ruby runtime, boolean noerror, boolean nowarning) { - this.errors = new ArrayList(4); + this.runtime = runtime; + this.errors = new ArrayList(4); this.noerror = noerror; this.nowarning = nowarning; } - List + public List getErrors() { return errors; } public void - addError(Exception ex) { errors.add(ex); } + addError(Exception ex) + { + addError(XmlSyntaxError.createXMLSyntaxError(runtime, ex)); + } + + public void + addError(RubyException ex) + { + errors.add(ex); + } + + public void + addError(RaiseException ex) + { + addError(ex.getException()); + } protected boolean usesNekoHtml(String domain) diff --git a/ext/java/nokogiri/internals/NokogiriHandler.java b/ext/java/nokogiri/internals/NokogiriHandler.java index 86a39c1204..fdc57f4f88 100644 --- a/ext/java/nokogiri/internals/NokogiriHandler.java +++ b/ext/java/nokogiri/internals/NokogiriHandler.java @@ -38,25 +38,19 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler private final Ruby runtime; private final RubyClass attrClass; private final IRubyObject object; - - /** - * Stores parse errors with the most-recent error last. - * - * TODO: should these be stored in the document 'errors' array? - * Currently only string messages are stored there. - */ - private final LinkedList errors = new LinkedList(); + private NokogiriErrorHandler errorHandler; private Locator locator; private boolean needEmptyAttrCheck; public - NokogiriHandler(Ruby runtime, IRubyObject object) + NokogiriHandler(Ruby runtime, IRubyObject object, NokogiriErrorHandler errorHandler) { assert object != null; this.runtime = runtime; this.attrClass = (RubyClass) runtime.getClassFromPath("Nokogiri::XML::SAX::Parser::Attribute"); this.object = object; + this.errorHandler = errorHandler; charactersBuilder = new StringBuilder(); String objectName = object.getMetaClass().getName(); if ("Nokogiri::HTML4::SAX::Parser".equals(objectName)) { needEmptyAttrCheck = true; } @@ -253,9 +247,9 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler try { final String msg = ex.getMessage(); call("error", runtime.newString(msg == null ? "" : msg)); - addError(XmlSyntaxError.createError(runtime, ex).toThrowable()); + errorHandler.addError(ex); } catch (RaiseException e) { - addError(e); + errorHandler.addError(e); throw e; } } @@ -282,22 +276,10 @@ public class NokogiriHandler extends DefaultHandler2 implements XmlDeclHandler call("warning", runtime.newString(msg == null ? "" : msg)); } - protected synchronized void - addError(RaiseException e) - { - errors.add(e); - } - public synchronized int getErrorCount() { - return errors.size(); - } - - public synchronized RaiseException - getLastError() - { - return errors.getLast(); + return errorHandler.getErrors().size(); } private void diff --git a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java index 1a9e5af490..bc907ddf95 100644 --- a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java @@ -1,6 +1,7 @@ package nokogiri.internals; import org.apache.xerces.xni.parser.XMLParseException; +import org.jruby.Ruby; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -13,21 +14,21 @@ public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler { public - NokogiriNonStrictErrorHandler(boolean noerror, boolean nowarning) + NokogiriNonStrictErrorHandler(Ruby runtime, boolean noerror, boolean nowarning) { - super(noerror, nowarning); + super(runtime, noerror, nowarning); } public void warning(SAXParseException ex) throws SAXException { - errors.add(ex); + addError(ex); } public void error(SAXParseException ex) throws SAXException { - errors.add(ex); + addError(ex); } public void @@ -38,7 +39,7 @@ public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler // found in the prolog, instead it will keep calling this method and we'll // keep inserting the error in the document errors array until we run // out of memory - errors.add(ex); + addError(ex); String message = ex.getMessage(); // The problem with Xerces is that some errors will cause the @@ -53,19 +54,19 @@ public class NokogiriNonStrictErrorHandler extends NokogiriErrorHandler public void error(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } public void fatalError(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } public void warning(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } /* diff --git a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java index 286820a423..152ee4657c 100644 --- a/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +++ b/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java @@ -1,6 +1,7 @@ package nokogiri.internals; import org.apache.xerces.xni.parser.XMLParseException; +import org.jruby.Ruby; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -20,15 +21,15 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler { public - NokogiriNonStrictErrorHandler4NekoHtml(boolean nowarning) + NokogiriNonStrictErrorHandler4NekoHtml(Ruby runtime, boolean nowarning) { - super(false, nowarning); + super(runtime, false, nowarning); } public - NokogiriNonStrictErrorHandler4NekoHtml(boolean noerror, boolean nowarning) + NokogiriNonStrictErrorHandler4NekoHtml(Ruby runtime, boolean noerror, boolean nowarning) { - super(noerror, nowarning); + super(runtime, noerror, nowarning); } public void @@ -40,13 +41,13 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler public void error(SAXParseException ex) throws SAXException { - errors.add(ex); + addError(ex); } public void fatalError(SAXParseException ex) throws SAXException { - errors.add(ex); + addError(ex); } /** @@ -64,7 +65,7 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler public void error(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } /** @@ -82,7 +83,7 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler public void fatalError(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } /** @@ -100,7 +101,7 @@ public class NokogiriNonStrictErrorHandler4NekoHtml extends NokogiriErrorHandler public void warning(String domain, String key, XMLParseException e) { - errors.add(e); + addError(e); } } diff --git a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java index e5566b5787..10cbc6f441 100644 --- a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +++ b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java @@ -1,6 +1,7 @@ package nokogiri.internals; import org.apache.xerces.xni.parser.XMLParseException; +import org.jruby.Ruby; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; @@ -14,23 +15,23 @@ public class NokogiriStrictErrorHandler extends NokogiriErrorHandler { public - NokogiriStrictErrorHandler(boolean noerror, boolean nowarning) + NokogiriStrictErrorHandler(Ruby runtime, boolean noerror, boolean nowarning) { - super(noerror, nowarning); + super(runtime, noerror, nowarning); } public void warning(SAXParseException spex) throws SAXException { if (!nowarning) { throw spex; } - else { errors.add(spex); } + else { addError(spex); } } public void error(SAXParseException spex) throws SAXException { if (!noerror) { throw spex; } - else { errors.add(spex); } + else { addError(spex); } } public void @@ -43,7 +44,7 @@ public class NokogiriStrictErrorHandler extends NokogiriErrorHandler error(String domain, String key, XMLParseException e) throws XMLParseException { if (!noerror) { throw e; } - else { errors.add(e); } + else { addError(e); } } public void @@ -56,6 +57,6 @@ public class NokogiriStrictErrorHandler extends NokogiriErrorHandler warning(String domain, String key, XMLParseException e) throws XMLParseException { if (!nowarning) { throw e; } - if (!usesNekoHtml(domain)) { errors.add(e); } + if (!usesNekoHtml(domain)) { addError(e); } } } diff --git a/ext/java/nokogiri/internals/XmlDomParserContext.java b/ext/java/nokogiri/internals/XmlDomParserContext.java index b94d1ae1f9..3621f1f98f 100644 --- a/ext/java/nokogiri/internals/XmlDomParserContext.java +++ b/ext/java/nokogiri/internals/XmlDomParserContext.java @@ -1,30 +1,24 @@ package nokogiri.internals; -import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; -import static nokogiri.internals.NokogiriHelpers.isBlank; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - +import nokogiri.XmlDocument; +import nokogiri.XmlDtd; +import nokogiri.XmlSyntaxError; import org.apache.xerces.parsers.DOMParser; -import org.jruby.Ruby; -import org.jruby.RubyArray; -import org.jruby.RubyClass; -import org.jruby.RubyFixnum; +import org.jruby.*; import org.jruby.exceptions.RaiseException; -import org.jruby.runtime.ThreadContext; import org.jruby.runtime.Helpers; +import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; -import nokogiri.NokogiriService; -import nokogiri.XmlDocument; -import nokogiri.XmlDtd; -import nokogiri.XmlSyntaxError; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static nokogiri.internals.NokogiriHelpers.isBlank; /** * Parser class for XML DOM processing. This class actually parses XML document @@ -48,7 +42,6 @@ public class XmlDomParserContext extends ParserContext protected static final String FEATURE_NOT_EXPAND_ENTITY = "http://apache.org/xml/features/dom/create-entity-ref-nodes"; protected static final String FEATURE_VALIDATION = "http://xml.org/sax/features/validation"; - private static final String XINCLUDE_FEATURE_ID = "http://apache.org/xml/features/xinclude"; private static final String SECURITY_MANAGER = "http://apache.org/xml/properties/security-manager"; protected ParserContext.Options options; @@ -69,17 +62,17 @@ public class XmlDomParserContext extends ParserContext this.options = new ParserContext.Options(RubyFixnum.fix2long(options)); java_encoding = NokogiriHelpers.getValidEncodingOrNull(encoding); ruby_encoding = encoding; - initErrorHandler(); + initErrorHandler(runtime); initParser(runtime); } protected void - initErrorHandler() + initErrorHandler(Ruby runtime) { if (options.recover) { - errorHandler = new NokogiriNonStrictErrorHandler(options.noError, options.noWarning); + errorHandler = new NokogiriNonStrictErrorHandler(runtime, options.noError, options.noWarning); } else { - errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning); + errorHandler = new NokogiriStrictErrorHandler(runtime, options.noError, options.noWarning); } } @@ -161,12 +154,10 @@ public class XmlDomParserContext extends ParserContext mapErrors(ThreadContext context, NokogiriErrorHandler errorHandler) { final Ruby runtime = context.runtime; - final List errors = errorHandler.getErrors(); + final List errors = errorHandler.getErrors(); final IRubyObject[] errorsAry = new IRubyObject[errors.size()]; for (int i = 0; i < errors.size(); i++) { - XmlSyntaxError xmlSyntaxError = XmlSyntaxError.createXMLSyntaxError(runtime); - xmlSyntaxError.setException(errors.get(i)); - errorsAry[i] = xmlSyntaxError; + errorsAry[i] = errors.get(i); } return runtime.newArrayNoCopy(errorsAry); } diff --git a/test/xml/sax/test_parser.rb b/test/xml/sax/test_parser.rb index de078d6115..1a54ff638b 100644 --- a/test/xml/sax/test_parser.rb +++ b/test/xml/sax/test_parser.rb @@ -2,425 +2,462 @@ require "helper" -module Nokogiri - module XML - module SAX - class TestParser < Nokogiri::SAX::TestCase - def setup - super - @parser = XML::SAX::Parser.new(Doc.new) - end - - def test_parser_context_yielded_io - doc = Doc.new - parser = XML::SAX::Parser.new(doc) - xml = "" - - block_called = false - parser.parse(StringIO.new(xml)) do |ctx| - block_called = true - ctx.replace_entities = true - end - - assert(block_called) - - assert_equal([["foo", [["a", "&b"]]]], doc.start_elements) - end - - def test_parser_context_yielded_in_memory - doc = Doc.new - parser = XML::SAX::Parser.new(doc) - xml = "" - - block_called = false - parser.parse(xml) do |ctx| - block_called = true - ctx.replace_entities = true - end - - assert(block_called) - - assert_equal([["foo", [["a", "&b"]]]], doc.start_elements) - end - - def test_empty_decl - parser = XML::SAX::Parser.new(Doc.new) - - xml = "" - parser.parse(xml) - assert(parser.document.start_document_called, xml) - assert_nil(parser.document.xmldecls, xml) - end - - def test_xml_decl - [ - ['', ["1.0"]], - ['', ["1.0", "UTF-8"]], - ['', ["1.0", "yes"]], - ['', ["1.0", "no"]], - ['', ["1.0", "UTF-8", "no"]], - ['', ["1.0", "ISO-8859-1", "yes"]], - ].each do |decl, value| - parser = XML::SAX::Parser.new(Doc.new) - - xml = "#{decl}\n" - parser.parse(xml) - assert(parser.document.start_document_called, xml) - assert_equal(value, parser.document.xmldecls, xml) - end - end - - def test_parse_empty - assert_raises(RuntimeError) do - @parser.parse("") - end - end - - def test_namespace_declaration_order_is_saved - @parser.parse(<<~eoxml) - - - - eoxml - assert_equal(2, @parser.document.start_elements_namespace.length) - el = @parser.document.start_elements_namespace.first - namespaces = el.last - assert_equal(["foo", "http://foo.example.com/"], namespaces.first) - assert_equal([nil, "http://example.com/"], namespaces.last) - end - - def test_bad_document_calls_error_handler - @parser.parse("") - assert(@parser.document.errors) - assert(@parser.document.errors.length > 0) - end - - def test_namespace_are_super_fun_to_parse - @parser.parse(<<~eoxml) - - - - - - hello world - - eoxml - - assert(@parser.document.start_elements_namespace.length > 0) - el = @parser.document.start_elements_namespace[1] - assert_equal("a", el.first) - assert_equal(1, el[1].length) - - attribute = el[1].first - assert_equal("bar", attribute.localname) - assert_equal("foo", attribute.prefix) - assert_equal("hello", attribute.value) - assert_equal("http://foo.example.com/", attribute.uri) - end - - def test_sax_v1_namespace_attribute_declarations - @parser.parse(<<~eoxml) - - - - - - hello world - - eoxml - assert(@parser.document.start_elements.length > 0) - elm = @parser.document.start_elements.first - assert_equal("root", elm.first) - assert(elm[1].include?(["xmlns:foo", "http://foo.example.com/"])) - assert(elm[1].include?(["xmlns", "http://example.com/"])) - end - - def test_sax_v1_namespace_nodes - @parser.parse(<<~eoxml) - - - - - - hello world - - eoxml - assert_equal(5, @parser.document.start_elements.length) - assert(@parser.document.start_elements.map(&:first).include?("foo:bar")) - assert(@parser.document.end_elements.map(&:first).include?("foo:bar")) - end - - def test_start_is_called_without_namespace - @parser.parse(<<~eoxml) - - - - eoxml - assert_equal(["root", "foo:f", "bar"], - @parser.document.start_elements.map(&:first)) - end - - def test_parser_sets_encoding - parser = XML::SAX::Parser.new(Doc.new, "UTF-8") - assert_equal("UTF-8", parser.encoding) - end - - def test_errors_set_after_parsing_bad_dom - doc = Nokogiri::XML("") - assert(doc.errors) - - @parser.parse("") - assert(@parser.document.errors) - assert(@parser.document.errors.length > 0) - - doc.errors.each do |error| - assert_equal("UTF-8", error.message.encoding.name) - end - - # when using JRuby Nokogiri, more errors will be generated as the DOM - # parser continue to parse an ill formed document, while the sax parser - # will stop at the first error - unless Nokogiri.jruby? - assert_equal(doc.errors.length, @parser.document.errors.length) - end - end - - def test_parse_with_memory_argument - @parser.parse(File.read(XML_FILE)) - assert(@parser.document.cdata_blocks.length > 0) - end - - def test_parse_with_io_argument - File.open(XML_FILE, "rb") do |f| - @parser.parse(f) - end - assert(@parser.document.cdata_blocks.length > 0) - end - - def test_parse_io - call_parse_io_with_encoding("UTF-8") - end - - # issue #828 - def test_parse_io_lower_case_encoding - call_parse_io_with_encoding("utf-8") - end - - def call_parse_io_with_encoding(encoding) - File.open(XML_FILE, "rb") do |f| - @parser.parse_io(f, encoding) - end - assert(@parser.document.cdata_blocks.length > 0) - - called = false - @parser.document.start_elements.flatten.each do |thing| - assert_equal("UTF-8", thing.encoding.name) - called = true - end - assert(called) - - called = false - @parser.document.end_elements.flatten.each do |thing| - assert_equal("UTF-8", thing.encoding.name) - called = true - end - assert(called) - - called = false - @parser.document.data.each do |thing| - assert_equal("UTF-8", thing.encoding.name) - called = true - end - assert(called) - - called = false - @parser.document.comments.flatten.each do |thing| - assert_equal("UTF-8", thing.encoding.name) - called = true - end - assert(called) - - called = false - @parser.document.cdata_blocks.flatten.each do |thing| - assert_equal("UTF-8", thing.encoding.name) - called = true - end - assert(called) - end - - def test_parse_file - @parser.parse_file(XML_FILE) - - assert_raises(ArgumentError) do - @parser.parse_file(nil) - end - - assert_raises(Errno::ENOENT) do - @parser.parse_file("") - end - assert_raises(Errno::EISDIR) do - @parser.parse_file(File.expand_path(File.dirname(__FILE__))) - end - end - - def test_render_parse_nil_param - assert_raises(ArgumentError) { @parser.parse_memory(nil) } - end - - def test_bad_encoding_args - assert_raises(ArgumentError) { XML::SAX::Parser.new(Doc.new, "not an encoding") } - assert_raises(ArgumentError) { @parser.parse_io(StringIO.new(""), "not an encoding") } - end - - def test_ctag - @parser.parse_memory(<<~eoxml) -

- - Paragraph 1 -

- eoxml - assert_equal([" This is a comment "], @parser.document.cdata_blocks) - end - - def test_comment - @parser.parse_memory(<<~eoxml) -

- - Paragraph 1 -

- eoxml - assert_equal([" This is a comment "], @parser.document.comments) - end - - def test_characters - @parser.parse_memory(<<~eoxml) -

Paragraph 1

- eoxml - assert_equal(["Paragraph 1"], @parser.document.data) - end - - def test_end_document - @parser.parse_memory(<<~eoxml) -

Paragraph 1

- eoxml - assert(@parser.document.end_document_called) - end - - def test_end_element - @parser.parse_memory(<<~eoxml) -

Paragraph 1

- eoxml - assert_equal([["p"]], @parser.document.end_elements) - end - - def test_start_element_attrs - @parser.parse_memory(<<~eoxml) -

Paragraph 1

- eoxml - assert_equal([["p", [["id", "asdfasdf"]]]], @parser.document.start_elements) - end - - def test_start_element_attrs_include_namespaces - @parser.parse_memory(<<~eoxml) -

Paragraph 1

- eoxml - assert_equal([["p", [["xmlns:foo", "http://foo.example.com/"]]]], - @parser.document.start_elements) - end - - def test_processing_instruction - @parser.parse_memory(<<~eoxml) - - - eoxml - assert_equal([["xml-stylesheet", 'href="a.xsl" type="text/xsl"']], - @parser.document.processing_instructions) - end - - def test_parse_document - skip_unless_libxml2("JRuby SAXParser only parses well-formed XML documents") - @parser.parse_memory(<<~eoxml) -

Paragraph 1

-

Paragraph 2

- eoxml - end - - def test_parser_attributes - xml = <<~eoxml - - eoxml - - block_called = false - @parser.parse(xml) do |ctx| - block_called = true - ctx.replace_entities = true - end - - assert(block_called) - - assert_equal([["root", []], ["foo", [["a", "&b"], ["c", ">d"]]]], @parser.document.start_elements) - end - - def test_recovery_from_incorrect_xml - xml = <<~eoxml - heyhey yourself - eoxml - - block_called = false - @parser.parse(xml) do |ctx| - block_called = true - ctx.recovery = true - end - - assert(block_called) - - assert_equal([["Root", []], ["Data", []], ["Item", []], ["Data", []], ["Item", []]], - @parser.document.start_elements) - end - - def test_square_bracket_in_text # issue 1261 - xml = <<~eoxml - - en:#:home_page:#:stories:#:[6]:#:name - Sandy S. - - eoxml - @parser.parse(xml) - assert_includes(@parser.document.data, "en:#:home_page:#:stories:#:[6]:#:name") - end - - def test_large_cdata_is_handled - # see #2132 and https://gitlab.gnome.org/GNOME/libxml2/-/issues/200 - skip("Upstream libxml2 <= 2.9.10 needs to be patched") if Nokogiri::VersionInfo.instance.libxml2_using_system? - - template = <<~EOF - - - - - gorilla - secret - - - - - - - - - EOF - - factor = 10 - huge_data = "a" * (1024 * 1024 * factor) - xml = StringIO.new(template % (huge_data)) - - handler = Nokogiri::SAX::TestCase::Doc.new - parser = Nokogiri::XML::SAX::Parser.new(handler) - parser.parse(xml) - - assert_predicate(handler.errors, :empty?) - end +class Nokogiri::SAX::TestCase + describe Nokogiri::XML::SAX::Parser do + let(:parser) { Nokogiri::XML::SAX::Parser.new(Doc.new) } + + it :test_parser_context_yielded_io do + doc = Doc.new + parser = Nokogiri::XML::SAX::Parser.new(doc) + xml = "" + + block_called = false + parser.parse(StringIO.new(xml)) do |ctx| + block_called = true + ctx.replace_entities = true end + + assert(block_called) + + assert_equal([["foo", [["a", "&b"]]]], doc.start_elements) + end + + it :test_parser_context_yielded_in_memory do + doc = Doc.new + parser = Nokogiri::XML::SAX::Parser.new(doc) + xml = "" + + block_called = false + parser.parse(xml) do |ctx| + block_called = true + ctx.replace_entities = true + end + + assert(block_called) + + assert_equal([["foo", [["a", "&b"]]]], doc.start_elements) + end + + it :test_empty_decl do + parser = Nokogiri::XML::SAX::Parser.new(Doc.new) + + xml = "" + parser.parse(xml) + assert(parser.document.start_document_called, xml) + assert_nil(parser.document.xmldecls, xml) + end + + it :test_xml_decl do + [ + ['', ["1.0"]], + ['', ["1.0", "UTF-8"]], + ['', ["1.0", "yes"]], + ['', ["1.0", "no"]], + ['', ["1.0", "UTF-8", "no"]], + ['', ["1.0", "ISO-8859-1", "yes"]], + ].each do |decl, value| + parser = Nokogiri::XML::SAX::Parser.new(Doc.new) + + xml = "#{decl}\n" + parser.parse(xml) + assert(parser.document.start_document_called, xml) + assert_equal(value, parser.document.xmldecls, xml) + end + end + + it :test_parse_empty do + assert_raises(RuntimeError) do + parser.parse("") + end + end + + it :test_namespace_declaration_order_is_saved do + parser.parse(<<~EOF) + +
+ + EOF + assert_equal(2, parser.document.start_elements_namespace.length) + el = parser.document.start_elements_namespace.first + namespaces = el.last + assert_equal(["foo", "http://foo.example.com/"], namespaces.first) + assert_equal([nil, "http://example.com/"], namespaces.last) + end + + it :test_bad_document_calls_error_handler do + parser.parse("") + assert(parser.document.errors) + assert(parser.document.errors.length > 0) + end + + it :test_namespace_are_super_fun_to_parse do + parser.parse(<<~EOF) + + + + + + hello world + + EOF + + assert(parser.document.start_elements_namespace.length > 0) + el = parser.document.start_elements_namespace[1] + assert_equal("a", el.first) + assert_equal(1, el[1].length) + + attribute = el[1].first + assert_equal("bar", attribute.localname) + assert_equal("foo", attribute.prefix) + assert_equal("hello", attribute.value) + assert_equal("http://foo.example.com/", attribute.uri) + end + + it :test_sax_v1_namespace_attribute_declarations do + parser.parse(<<~EOF) + + + + + + hello world + + EOF + assert(parser.document.start_elements.length > 0) + elm = parser.document.start_elements.first + assert_equal("root", elm.first) + assert(elm[1].include?(["xmlns:foo", "http://foo.example.com/"])) + assert(elm[1].include?(["xmlns", "http://example.com/"])) + end + + it :test_sax_v1_namespace_nodes do + parser.parse(<<~EOF) + + + + + + hello world + + EOF + assert_equal(5, parser.document.start_elements.length) + assert(parser.document.start_elements.map(&:first).include?("foo:bar")) + assert(parser.document.end_elements.map(&:first).include?("foo:bar")) + end + + it :test_start_is_called_without_namespace do + parser.parse(<<~EOF) + + + + EOF + assert_equal( + ["root", "foo:f", "bar"], + parser.document.start_elements.map(&:first) + ) + end + + it :test_parser_sets_encoding do + parser = Nokogiri::XML::SAX::Parser.new(Doc.new, "UTF-8") + assert_equal("UTF-8", parser.encoding) + end + + it :test_errors_set_after_parsing_bad_dom do + doc = Nokogiri::XML("") + assert(doc.errors) + + parser.parse("") + assert(parser.document.errors) + assert(parser.document.errors.length > 0) + + doc.errors.each do |error| + assert_equal("UTF-8", error.message.encoding.name) + end + + # when using JRuby Nokogiri, more errors will be generated as the DOM + # parser continue to parse an ill formed document, while the sax parser + # will stop at the first error + unless Nokogiri.jruby? + assert_equal(doc.errors.length, parser.document.errors.length) + end + end + + it :test_parse_with_memory_argument do + parser.parse(File.read(XML_FILE)) + assert(parser.document.cdata_blocks.length > 0) + end + + it :test_parse_with_io_argument do + File.open(XML_FILE, "rb") do |f| + parser.parse(f) + end + assert(parser.document.cdata_blocks.length > 0) + end + + it :test_parse_io do + call_parse_io_with_encoding("UTF-8") + end + + # issue #828 + it :test_parse_io_lower_case_encoding do + call_parse_io_with_encoding("utf-8") + end + + def call_parse_io_with_encoding(encoding) + File.open(XML_FILE, "rb") do |f| + parser.parse_io(f, encoding) + end + assert(parser.document.cdata_blocks.length > 0) + + called = false + parser.document.start_elements.flatten.each do |thing| + assert_equal("UTF-8", thing.encoding.name) + called = true + end + assert(called) + + called = false + parser.document.end_elements.flatten.each do |thing| + assert_equal("UTF-8", thing.encoding.name) + called = true + end + assert(called) + + called = false + parser.document.data.each do |thing| + assert_equal("UTF-8", thing.encoding.name) + called = true + end + assert(called) + + called = false + parser.document.comments.flatten.each do |thing| + assert_equal("UTF-8", thing.encoding.name) + called = true + end + assert(called) + + called = false + parser.document.cdata_blocks.flatten.each do |thing| + assert_equal("UTF-8", thing.encoding.name) + called = true + end + assert(called) + end + + it :test_parse_file do + parser.parse_file(XML_FILE) + + assert_raises(ArgumentError) do + parser.parse_file(nil) + end + + assert_raises(Errno::ENOENT) do + parser.parse_file("") + end + assert_raises(Errno::EISDIR) do + parser.parse_file(File.expand_path(File.dirname(__FILE__))) + end + end + + it :test_render_parse_nil_param do + assert_raises(ArgumentError) { parser.parse_memory(nil) } + end + + it :test_bad_encoding_args do + assert_raises(ArgumentError) { Nokogiri::XML::SAX::Parser.new(Doc.new, "not an encoding") } + assert_raises(ArgumentError) { parser.parse_io(StringIO.new(""), "not an encoding") } + end + + it :test_ctag do + parser.parse_memory(<<~EOF) +

+ + Paragraph 1 +

+ EOF + assert_equal([" This is a comment "], parser.document.cdata_blocks) + end + + it :test_comment do + parser.parse_memory(<<~EOF) +

+ + Paragraph 1 +

+ EOF + assert_equal([" This is a comment "], parser.document.comments) + end + + it :test_characters do + parser.parse_memory(<<~EOF) +

Paragraph 1

+ EOF + assert_equal(["Paragraph 1"], parser.document.data) + end + + it :test_end_document do + parser.parse_memory(<<~EOF) +

Paragraph 1

+ EOF + assert(parser.document.end_document_called) + end + + it :test_end_element do + parser.parse_memory(<<~EOF) +

Paragraph 1

+ EOF + assert_equal([["p"]], parser.document.end_elements) + end + + it :test_start_element_attrs do + parser.parse_memory(<<~EOF) +

Paragraph 1

+ EOF + assert_equal([["p", [["id", "asdfasdf"]]]], parser.document.start_elements) + end + + it :test_start_element_attrs_include_namespaces do + parser.parse_memory(<<~EOF) +

Paragraph 1

+ EOF + assert_equal( + [["p", [["xmlns:foo", "http://foo.example.com/"]]]], + parser.document.start_elements + ) + end + + it :test_processing_instruction do + parser.parse_memory(<<~EOF) + + + EOF + assert_equal( + [["xml-stylesheet", 'href="a.xsl" type="text/xsl"']], + parser.document.processing_instructions + ) + end + + it :test_parse_document do + skip_unless_libxml2("JRuby SAXParser only parses well-formed XML documents") + parser.parse_memory(<<~EOF) +

Paragraph 1

+

Paragraph 2

+ EOF + end + + it :test_parser_attributes do + xml = <<~EOF + + EOF + + block_called = false + parser.parse(xml) do |ctx| + block_called = true + ctx.replace_entities = true + end + + assert(block_called) + + assert_equal( + [["root", []], ["foo", [["a", "&b"], ["c", ">d"]]]], parser.document.start_elements + ) + end + + it :test_recovery_from_incorrect_xml do + xml = <<~EOF + heyhey yourself + EOF + + block_called = false + parser.parse(xml) do |ctx| + block_called = true + ctx.recovery = true + end + + assert(block_called) + + assert_equal( + [["Root", []], ["Data", []], ["Item", []], ["Data", []], ["Item", []]], + parser.document.start_elements + ) + end + + it :test_square_bracket_in_text do + # issue 1261 + xml = <<~EOF + + en:#:home_page:#:stories:#:[6]:#:name + Sandy S. + + EOF + parser.parse(xml) + assert_includes(parser.document.data, "en:#:home_page:#:stories:#:[6]:#:name") + end + + it :test_large_cdata_is_handled do + # see #2132 and https://gitlab.gnome.org/GNOME/libxml2/-/issues/200 + skip("Upstream libxml2 <= 2.9.10 needs to be patched") if Nokogiri::VersionInfo.instance.libxml2_using_system? + + template = <<~EOF + + + + + gorilla + secret + + + + + + + + + EOF + + factor = 10 + huge_data = "a" * (1024 * 1024 * factor) + xml = StringIO.new(template % huge_data) + + handler = Nokogiri::SAX::TestCase::Doc.new + parser = Nokogiri::XML::SAX::Parser.new(handler) + parser.parse(xml) + + assert_predicate(handler.errors, :empty?) + end + + it "does not resolve entities by default" do + xml = <<~EOF + + + + ]> + &local;&custom; + EOF + + doc = Doc.new + parser = Nokogiri::XML::SAX::Parser.new(doc) + parser.parse(xml) + + assert_nil(doc.data) + end + + it "does not resolve network external entities by default" do + xml = <<~EOF + + + ]> + &remote; + EOF + + doc = Doc.new + parser = Nokogiri::XML::SAX::Parser.new(doc) + parser.parse(xml) + + assert_nil(doc.data) end end end diff --git a/test/xml/sax/test_push_parser.rb b/test/xml/sax/test_push_parser.rb index 587bd54a1a..dfab1793fa 100644 --- a/test/xml/sax/test_push_parser.rb +++ b/test/xml/sax/test_push_parser.rb @@ -1,266 +1,243 @@ # -*- coding: utf-8 -*- +# frozen_string_literal: true require "helper" -module Nokogiri - module XML - module SAX - class TestPushParser < Nokogiri::SAX::TestCase - def setup - super - @parser = XML::SAX::PushParser.new(Doc.new) - end - - def test_exception - assert_raises(SyntaxError) do - @parser << "" - end - - assert_raises(SyntaxError) do - @parser << nil - end - end +class Nokogiri::SAX::TestCase + describe Nokogiri::XML::SAX::PushParser do + let(:parser) { Nokogiri::XML::SAX::PushParser.new(Doc.new) } - def test_early_finish - @parser << "" - assert_raises(SyntaxError) do - @parser.finish - end - end + it :test_exception do + assert_raises(Nokogiri::XML::SyntaxError) do + parser << "" + end - def test_write_last_chunk - @parser << "" - @parser.write "", true - assert_equal [["foo", []]], @parser.document.start_elements - assert_equal [["foo"]], @parser.document.end_elements - end + assert_raises(Nokogiri::XML::SyntaxError) do + parser << nil + end + end - def test_empty_doc - @parser.options |= XML::ParseOptions::RECOVER - @parser.write "", true - assert_nil @parser.document.start_elements - assert_nil @parser.document.end_elements - end + it :test_early_finish do + parser << "" + assert_raises(Nokogiri::XML::SyntaxError) do + parser.finish + end + end + it :test_write_last_chunk do + parser << "" + parser.write("", true) + assert_equal [["foo", []]], parser.document.start_elements + assert_equal [["foo"]], parser.document.end_elements + end - def test_finish_should_rethrow_last_error - begin - @parser << "" - rescue => e - expected = e - end + it :test_empty_doc do + parser.options |= Nokogiri::XML::ParseOptions::RECOVER + parser.write("", true) + assert_nil parser.document.start_elements + assert_nil parser.document.end_elements + end - begin - @parser.finish - rescue => e - actual = e - end + it :test_finish_should_rethrow_last_error do + expected = assert_raise(Nokogiri::XML::SyntaxError) { parser << "" } + actual = assert_raise(Nokogiri::XML::SyntaxError) { parser.finish } + assert_equal actual.message, expected.message + end - assert_equal actual.message, expected.message + it :test_should_throw_error_returned_by_document do + doc = Doc.new + class << doc + def error(msg) + raise "parse error" end + end + parser = Nokogiri::XML::SAX::PushParser.new(doc) - def test_should_throw_error_returned_by_document - doc = Doc.new - class << doc - def error msg - raise "parse error" - end - end - - @parser = XML::SAX::PushParser.new(doc) - begin - @parser << "" - rescue => e - actual = e - end - fail 'PushParser should throw error when fed ill-formed data' if actual.nil? - - assert_equal actual.message, "parse error" - end + exception = assert_raise(RuntimeError) { parser << "" } + assert_equal exception.message, "parse error" + end - def test_writing_nil - assert_equal @parser.write(nil), @parser - end + it :test_writing_nil do + assert_equal parser.write(nil), parser + end - def test_end_document_called - @parser.<<(<<-eoxml) -

- - Paragraph 1 -

- eoxml - assert ! @parser.document.end_document_called - @parser.finish - assert @parser.document.end_document_called - end + it :test_end_document_called do + parser.<<(<<~EOF) +

+ + Paragraph 1 +

+ EOF + assert !parser.document.end_document_called + parser.finish + assert parser.document.end_document_called + end - def test_start_element - @parser.<<(<<-eoxml) -

- eoxml - - assert_equal [["p", [["id", "asdfasdf"]]]], - @parser.document.start_elements - - @parser.<<(<<-eoxml) - - Paragraph 1 -

- eoxml - assert_equal [' This is a comment '], @parser.document.comments - @parser.finish - end + it :test_start_element do + parser.<<(<<~EOF) +

+ EOF + + assert_equal [["p", [["id", "asdfasdf"]]]], + parser.document.start_elements + + parser.<<(<<~EOF) + + Paragraph 1 +

+ EOF + assert_equal [" This is a comment "], parser.document.comments + parser.finish + end - def test_start_element_with_namespaces - @parser.<<(<<-eoxml) -

- eoxml - - assert_equal [["p", [["xmlns:foo", "http://foo.example.com/"]]]], - @parser.document.start_elements - - @parser.<<(<<-eoxml) - - Paragraph 1 -

- eoxml - assert_equal [' This is a comment '], @parser.document.comments - @parser.finish - end + it :test_start_element_with_namespaces do + parser.<<(<<~EOF) +

+ EOF + + assert_equal [["p", [["xmlns:foo", "http://foo.example.com/"]]]], + parser.document.start_elements + + parser.<<(<<~EOF) + + Paragraph 1 +

+ EOF + assert_equal [" This is a comment "], parser.document.comments + parser.finish + end - def test_start_element_ns - @parser.<<(<<-eoxml) - - eoxml + it :test_start_element_ns do + parser.<<(<<~EOF) + + EOF - assert_equal 1, @parser.document.start_elements_namespace.length - el = @parser.document.start_elements_namespace.first + assert_equal 1, parser.document.start_elements_namespace.length + el = parser.document.start_elements_namespace.first - assert_equal 'stream', el.first - assert_equal 2, el[1].length - assert_equal [['version', '1.0'], ['size', 'large']], - el[1].map { |x| [x.localname, x.value] } + assert_equal "stream", el.first + assert_equal 2, el[1].length + assert_equal [["version", "1.0"], ["size", "large"]], + el[1].map { |x| [x.localname, x.value] } - assert_equal 'stream', el[2] - assert_equal 'http://etherx.jabber.org/streams', el[3] - @parser.finish - end + assert_equal "stream", el[2] + assert_equal "http://etherx.jabber.org/streams", el[3] + parser.finish + end - def test_end_element_ns - @parser.<<(<<-eoxml) - - eoxml + it :test_end_element_ns do + parser.<<(<<~EOF) + + EOF - assert_equal [['stream', 'stream', 'http://etherx.jabber.org/streams']], - @parser.document.end_elements_namespace - @parser.finish - end + assert_equal [["stream", "stream", "http://etherx.jabber.org/streams"]], + parser.document.end_elements_namespace + parser.finish + end - def test_chevron_partial_xml - @parser.<<(<<-eoxml) -

- eoxml - - @parser.<<(<<-eoxml) - - Paragraph 1 -

- eoxml - assert_equal [' This is a comment '], @parser.document.comments - @parser.finish - end + it :test_chevron_partial_xml do + parser.<<(<<~EOF) +

+ EOF + + parser.<<(<<~EOF) + + Paragraph 1 +

+ EOF + assert_equal [" This is a comment "], parser.document.comments + parser.finish + end - def test_chevron - @parser.<<(<<-eoxml) -

- - Paragraph 1 -

- eoxml - @parser.finish - assert_equal [' This is a comment '], @parser.document.comments - end + it :test_chevron do + parser.<<(<<~EOF) +

+ + Paragraph 1 +

+ EOF + parser.finish + assert_equal [" This is a comment "], parser.document.comments + end - def test_default_options - assert_equal 0, @parser.options - end + it :test_default_options do + assert_equal 0, parser.options + end - def test_recover - @parser.options |= XML::ParseOptions::RECOVER - @parser.<<(<<-eoxml) -

- Foo - - Bar -

- eoxml - @parser.finish - assert(@parser.document.errors.size >= 1) - assert_equal [["p", []], ["bar", []]], @parser.document.start_elements - assert_equal "FooBar", @parser.document.data.map { |x| - x.gsub(/\s/, '') - }.join - end + it :test_recover do + parser.options |= Nokogiri::XML::ParseOptions::RECOVER + parser.<<(<<~EOF) +

+ Foo + + Bar +

+ EOF + parser.finish + assert(parser.document.errors.size >= 1) + assert_equal [["p", []], ["bar", []]], parser.document.start_elements + assert_equal "FooBar", parser.document.data.map { |x| + x.gsub(/\s/, "") + }.join + end - def test_broken_encoding - skip_unless_libxml2("ultra hard to fix for pure Java version") - @parser.options |= XML::ParseOptions::RECOVER - # This is ISO_8859-1: - @parser.<< "Gau\337" - @parser.finish - assert(@parser.document.errors.size >= 1) - assert_equal "Gau\337", @parser.document.data.join - assert_equal [["r"]], @parser.document.end_elements - end + it :test_broken_encoding do + skip_unless_libxml2("ultra hard to fix for pure Java version") + parser.options |= Nokogiri::XML::ParseOptions::RECOVER + # This is ISO_8859-1: + parser.<< "Gau\337" + parser.finish + assert(parser.document.errors.size >= 1) + assert_equal "Gau\337", parser.document.data.join + assert_equal [["r"]], parser.document.end_elements + end - def test_replace_entities_attribute_behavior - if Nokogiri.uses_libxml? - # initially false - assert_equal false, @parser.replace_entities - - # can be set to true - @parser.replace_entities = true - assert_equal true, @parser.replace_entities - - # can be set to false - @parser.replace_entities = false - assert_equal false, @parser.replace_entities - else - # initially true - assert_equal true, @parser.replace_entities - - # ignore attempts to set to false - @parser.replace_entities = false # TODO: should we raise an exception here? - assert_equal true, @parser.replace_entities - end - end + it :test_replace_entities_attribute_behavior do + if Nokogiri.uses_libxml? + # initially false + assert_equal false, parser.replace_entities + + # can be set to true + parser.replace_entities = true + assert_equal true, parser.replace_entities + + # can be set to false + parser.replace_entities = false + assert_equal false, parser.replace_entities + else + # initially true + assert_equal true, parser.replace_entities + + # ignore attempts to set to false + parser.replace_entities = false # TODO: should we raise an exception here? + assert_equal true, parser.replace_entities + end + end - def test_untouched_entities - skip_unless_libxml2("entities are always replaced in pure Java version") - @parser.<<(<<-eoxml) -

- - Paragraph 1 & 2 -

- eoxml - @parser.finish - assert_equal [["p", [["id", "asdf&asdf"]]]], @parser.document.start_elements - assert_equal "Paragraph 1 & 2", @parser.document.data.join.strip - end + it :test_untouched_entities do + skip_unless_libxml2("entities are always replaced in pure Java version") + parser.<<(<<~EOF) +

+ + Paragraph 1 & 2 +

+ EOF + parser.finish + assert_equal [["p", [["id", "asdf&asdf"]]]], parser.document.start_elements + assert_equal "Paragraph 1 & 2", parser.document.data.join.strip + end - def test_replaced_entities - @parser.replace_entities = true - @parser.<<(<<-eoxml) -

- - Paragraph 1 & 2 -

- eoxml - @parser.finish - assert_equal [["p", [["id", "asdf&asdf"]]]], @parser.document.start_elements - assert_equal "Paragraph 1 & 2", @parser.document.data.join.strip - end - end + it :test_replaced_entities do + parser.replace_entities = true + parser.<<(<<~EOF) +

+ + Paragraph 1 & 2 +

+ EOF + parser.finish + assert_equal [["p", [["id", "asdf&asdf"]]]], parser.document.start_elements + assert_equal "Paragraph 1 & 2", parser.document.data.join.strip end end end