From 193d50650a18ebeec3a3dd932f5dd140b57e6976 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 23 Aug 2021 23:12:40 -0400 Subject: [PATCH] doc: improve documentation for XML::{Schema,RelaxNG} --- ext/nokogiri/xml_relax_ng.c | 45 ++++++++------ ext/nokogiri/xml_schema.c | 60 +++++++++---------- lib/nokogiri/xml/relax_ng.rb | 52 +++++++++------- lib/nokogiri/xml/schema.rb | 112 ++++++++++++++++++++--------------- 4 files changed, 152 insertions(+), 117 deletions(-) diff --git a/ext/nokogiri/xml_relax_ng.c b/ext/nokogiri/xml_relax_ng.c index e55f54e4e7..1919b81586 100644 --- a/ext/nokogiri/xml_relax_ng.c +++ b/ext/nokogiri/xml_relax_ng.c @@ -10,14 +10,9 @@ dealloc(xmlRelaxNGPtr schema) NOKOGIRI_DEBUG_END(schema); } -/* - * call-seq: - * validate_document(document) - * - * Validate a Nokogiri::XML::Document against this RelaxNG schema. - */ + static VALUE -validate_document(VALUE self, VALUE document) +rb_xml_relax_ng_validate_document(VALUE self, VALUE document) { xmlDocPtr doc; xmlRelaxNGPtr schema; @@ -51,14 +46,21 @@ validate_document(VALUE self, VALUE document) return errors; } + /* - * call-seq: - * read_memory(string) + * @overload read_memory(input, parse_options = ParseOptions::DEFAULT_SCHEMA) * - * Create a new RelaxNG from the contents of +string+ + * Parse a RELAX NG schema definition and create a new Schema object. + * + * Note that the limitation of this method relative to {RelaxNG.new} is that +input+ must be type + * String, whereas {RelaxNG.new} also supports IO types. + * + * @param input [String] RELAX NG schema definition + * @param parse_options [Nokogiri::XML::ParseOptions] + * @return [Nokogiri::XML::RelaxNG] */ static VALUE -read_memory(int argc, VALUE *argv, VALUE klass) +rb_xml_relax_ng_s_read_memory(int argc, VALUE *argv, VALUE klass) { VALUE content; VALUE parse_options; @@ -109,14 +111,21 @@ read_memory(int argc, VALUE *argv, VALUE klass) return rb_schema; } + /* - * call-seq: - * from_document(doc) + * @overload from_document(document, parse_options = ParseOptions::DEFAULT_SCHEMA) + * + * Create a Schema from an already-parsed RELAX NG schema definition document. + * + * @param document [XML::Document] A {XML::Document} object representing the parsed RELAX NG + * @param parse_options [Nokogiri::XML::ParseOptions] + * @return [Nokogiri::XML::RelaxNG] * - * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+ + * @note +parse_options+ is currently unused by this method and is present only as a placeholder + * for future functionality. */ static VALUE -from_document(int argc, VALUE *argv, VALUE klass) +rb_xml_relax_ng_s_from_document(int argc, VALUE *argv, VALUE klass) { VALUE document; VALUE parse_options; @@ -178,8 +187,8 @@ noko_init_xml_relax_ng() assert(cNokogiriXmlSchema); cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema); - rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1); - rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1); + rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", rb_xml_relax_ng_s_read_memory, -1); + rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", rb_xml_relax_ng_s_from_document, -1); - rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", validate_document, 1); + rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", rb_xml_relax_ng_validate_document, 1); } diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index a8175e6da3..8503598ba6 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -10,14 +10,8 @@ dealloc(xmlSchemaPtr schema) NOKOGIRI_DEBUG_END(schema); } -/* - * call-seq: - * validate_document(document) - * - * Validate a Nokogiri::XML::Document against this Schema. - */ static VALUE -validate_document(VALUE self, VALUE document) +rb_xml_schema_validate_document(VALUE self, VALUE document) { xmlDocPtr doc; xmlSchemaPtr schema; @@ -51,14 +45,8 @@ validate_document(VALUE self, VALUE document) return errors; } -/* - * call-seq: - * validate_file(filename) - * - * Validate a file against this Schema. - */ static VALUE -validate_file(VALUE self, VALUE rb_filename) +rb_xml_schema_validate_file(VALUE self, VALUE rb_filename) { xmlSchemaPtr schema; xmlSchemaValidCtxtPtr valid_ctxt; @@ -93,13 +81,19 @@ validate_file(VALUE self, VALUE rb_filename) } /* - * call-seq: - * read_memory(string) + * @overload read_memory(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + * + * Parse an XSD schema definition and create a new Schema object. + * + * Note that the limitation of this method relative to {Schema.new} is that +input+ must be type + * String, whereas {Schema.new} also supports IO types. * - * Create a new Schema from the contents of +string+ + * @param input [String] XSD schema definition + * @param parse_options [Nokogiri::XML::ParseOptions] + * @return [Nokogiri::XML::Schema] */ static VALUE -read_memory(int argc, VALUE *argv, VALUE klass) +rb_xml_schema_s_read_memory(int argc, VALUE *argv, VALUE klass) { VALUE content; VALUE parse_options; @@ -162,6 +156,7 @@ read_memory(int argc, VALUE *argv, VALUE klass) return rb_schema; } + /* Schema creation will remove and deallocate "blank" nodes. * If those blank nodes have been exposed to Ruby, they could get freed * out from under the VALUE pointer. This function checks to see if any of @@ -188,14 +183,18 @@ has_blank_nodes_p(VALUE cache) return 0; } + /* - * call-seq: - * from_document(doc) + * @overload from_document(document, parse_options = ParseOptions::DEFAULT_SCHEMA) * - * Create a new Schema from the Nokogiri::XML::Document +doc+ + * Create a Schema from an already-parsed XSD schema definition document. + * + * @param document [XML::Document] A {XML::Document} object representing the parsed XSD + * @param parse_options [Nokogiri::XML::ParseOptions] + * @return [Nokogiri::XML::Schema] */ static VALUE -from_document(int argc, VALUE *argv, VALUE klass) +rb_xml_schema_s_from_document(int argc, VALUE *argv, VALUE klass) { VALUE document; VALUE parse_options; @@ -206,7 +205,7 @@ from_document(int argc, VALUE *argv, VALUE klass) VALUE errors; VALUE rb_schema; int scanned_args = 0; - xmlExternalEntityLoader old_loader = 0; + xmlExternalEntityLoader saved_loader = 0; scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options); @@ -236,14 +235,14 @@ from_document(int argc, VALUE *argv, VALUE klass) #endif if (parse_options_int & XML_PARSE_NONET) { - old_loader = xmlGetExternalEntityLoader(); + saved_loader = xmlGetExternalEntityLoader(); xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader); } schema = xmlSchemaParse(ctx); - if (old_loader) { - xmlSetExternalEntityLoader(old_loader); + if (saved_loader) { + xmlSetExternalEntityLoader(saved_loader); } xmlSetStructuredErrorFunc(NULL, NULL); @@ -269,6 +268,7 @@ from_document(int argc, VALUE *argv, VALUE klass) return Qnil; } + void noko_init_xml_schema() { @@ -276,9 +276,9 @@ noko_init_xml_schema() rb_undef_alloc_func(cNokogiriXmlSchema); - rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1); - rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1); + rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", rb_xml_schema_s_read_memory, -1); + rb_define_singleton_method(cNokogiriXmlSchema, "from_document", rb_xml_schema_s_from_document, -1); - rb_define_private_method(cNokogiriXmlSchema, "validate_document", validate_document, 1); - rb_define_private_method(cNokogiriXmlSchema, "validate_file", validate_file, 1); + rb_define_private_method(cNokogiriXmlSchema, "validate_document", rb_xml_schema_validate_document, 1); + rb_define_private_method(cNokogiriXmlSchema, "validate_file", rb_xml_schema_validate_file, 1); } diff --git a/lib/nokogiri/xml/relax_ng.rb b/lib/nokogiri/xml/relax_ng.rb index 631dac29e3..2556d91769 100644 --- a/lib/nokogiri/xml/relax_ng.rb +++ b/lib/nokogiri/xml/relax_ng.rb @@ -2,36 +2,44 @@ module Nokogiri module XML class << self - ### - # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+. - # See Nokogiri::XML::RelaxNG for an example. - def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) - RelaxNG.new(string_or_io, options) + # Parse a RELAX NG schema definition and create a new {Schema} object. This is a convenience + # method for {Nokogiri::XML::RelaxNG.new}. + # @see Nokogiri::XML::RelaxNG.new + # + # @param input [String, IO] RELAX NG schema definition + # @param parse_options [Nokogiri::XML::ParseOptions] + # @return [Nokogiri::XML::RelaxNG] + def RelaxNG(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + RelaxNG.new(input, parse_options) end end ### - # Nokogiri::XML::RelaxNG is used for validating XML against a - # RelaxNG schema. + # Nokogiri::XML::RelaxNG is used for validating XML against a RELAX NG schema definition. # - # == Synopsis + # @example Determine whether an XML document is valid. + # schema = Nokogiri::XML::RelaxNG(File.read(RELAX_NG_FILE)) + # doc = Nokogiri::XML(File.read(XML_FILE)) + # schema.valid?(doc) # Boolean # - # Validate an XML document against a RelaxNG schema. Loop over the errors - # that are returned and print them out: + # @example Validate an XML document against a RelaxNG schema, and capture any errors that are found. + # schema = Nokogiri::XML::RelaxNG(File.open(RELAX_NG_FILE)) + # doc = Nokogiri::XML(File.open(XML_FILE)) + # errors = schema.validate(doc) # Array # - # schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE)) - # doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE)) - # - # schema.validate(doc).each do |error| - # puts error.message - # end - # - # The list of errors are Nokogiri::XML::SyntaxError objects. - # - # NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the - # underlying parsing libraries to access network resources. This is counter to Nokogiri's - # "untrusted by default" security policy, but is a limitation of the underlying libraries. + # @note RELAX NG input is always treated as *trusted*, meaning that the underlying parsing + # libraries *will access network resources*. This is counter to Nokogiri's "untrusted by + # default" security policy, but is an unfortunate limitation of the underlying + # libraries. Please do not use this class for untrusted schema documents. class RelaxNG < Nokogiri::XML::Schema + # Parse a RELAX NG schema definition and create a new {Schema} object. + # + # @param input [String, IO] RELAX NG schema definition + # @param parse_options [Nokogiri::XML::ParseOptions] + # @return [Nokogiri::XML::RelaxNG] + def self.new(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + from_document(Nokogiri::XML(input), parse_options) + end end end end diff --git a/lib/nokogiri/xml/schema.rb b/lib/nokogiri/xml/schema.rb index 904cf704f2..738095d621 100644 --- a/lib/nokogiri/xml/schema.rb +++ b/lib/nokogiri/xml/schema.rb @@ -2,70 +2,88 @@ module Nokogiri module XML class << self - ### - # Create a new Nokogiri::XML::Schema object using a +string_or_io+ - # object. - def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) - Schema.new(string_or_io, options) + # Parse an XSD schema definition and create a new {Schema} object. This is a convenience + # method for {Nokogiri::XML::Schema.new}. + # @see Nokogiri::XML::Schema.new + # + # @param input [String, IO] XSD schema definition + # @param parse_options [Nokogiri::XML::ParseOptions] + # @return [Nokogiri::XML::Schema] + def Schema(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + Schema.new(input, parse_options) end end - ### - # Nokogiri::XML::Schema is used for validating XML against a schema - # (usually from an xsd file). + # Nokogiri::XML::Schema is used for validating XML against an XSD schema definition. # - # == Synopsis + # @example Determine whether an XML document is valid. + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # doc = Nokogiri::XML(File.read(XML_FILE)) + # schema.valid?(doc) # Boolean # - # Validate an XML document against a Schema. Loop over the errors that - # are returned and print them out: + # @example Validate an XML document against a Schema, and capture any errors that are found. + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # doc = Nokogiri::XML(File.read(XML_FILE)) + # errors = schema.validate(doc) # Array # - # xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE)) - # doc = Nokogiri::XML(File.read(PO_XML_FILE)) + # @note As of v1.11.0, {Schema} treats inputs as *untrusted* by default, and so external + # entities are not resolved from the network (+http://+ or +ftp://+). When parsing a + # trusted document, the caller may turn off the +NONET+ option via the {ParseOptions} to + # (re-)enable external entity resolution over a network connection. # - # xsd.validate(doc).each do |error| - # puts error.message - # end - # - # The list of errors are Nokogiri::XML::SyntaxError objects. - # - # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities - # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated - # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default" - # security policy. If a document is trusted, then the caller may turn off the NONET option via - # the ParseOptions to re-enable external entity resolution over a network connection. + # Previously, documents were "trusted" by default during schema parsing which was counter + # to Nokogiri's "untrusted by default" security policy. class Schema - # Errors while parsing the schema file + # Array of {SyntaxError}s found when parsing the XSD attr_accessor :errors - # The Nokogiri::XML::ParseOptions used to parse the schema + # The {Nokogiri::XML::ParseOptions} used to parse the schema attr_accessor :parse_options - ### - # Create a new Nokogiri::XML::Schema object using a +string_or_io+ - # object. - def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA - from_document(Nokogiri::XML(string_or_io), options) + # Parse an XSD schema definition and create a new {Schema} object. + # + # @param input [String, IO] XSD schema definition + # @param parse_options [Nokogiri::XML::ParseOptions] + # @return [Nokogiri::XML::Schema] + def self.new(input, parse_options = ParseOptions::DEFAULT_SCHEMA) + from_document(Nokogiri::XML(input), parse_options) end - ### - # Validate +thing+ against this schema. +thing+ can be a - # Nokogiri::XML::Document object, or a filename. An Array of - # Nokogiri::XML::SyntaxError objects found while validating the - # +thing+ is returned. - def validate thing - if thing.is_a?(Nokogiri::XML::Document) - validate_document(thing) - elsif File.file?(thing) - validate_file(thing) + # Validate +input+ and return any errors that are found. + # + # @param input [Nokogiri::XML::Document, String] A parsed document, or a string containing a local filename. + # @return [Array] + # + # @example Validate an existing XML::Document +document+, and capture any errors that are found. + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # errors = schema.validate(document) + # + # @example Validate an XML document on disk, and capture any errors that are found. + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # errors = schema.validate("/path/to/file.xml") + def validate(input) + if input.is_a?(Nokogiri::XML::Document) + validate_document(input) + elsif File.file?(input) + validate_file(input) else - raise ArgumentError, "Must provide Nokogiri::Xml::Document or the name of an existing file" + raise ArgumentError, "Must provide Nokogiri::XML::Document or the name of an existing file" end end - ### - # Returns true if +thing+ is a valid Nokogiri::XML::Document or - # file. - def valid? thing - validate(thing).length == 0 + # Validate +input+ and return a Boolean indicating whether the document is valid + # + # @param input [Nokogiri::XML::Document, String] A parsed document, or a string containing a local filename. + # @return [Boolean] + # + # @example Validate an existing XML::Document +document+ + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # schema.valid?(document) + # + # @example Validate an XML document on disk + # schema = Nokogiri::XML::Schema(File.read(XSD_FILE)) + # schema.valid?("/path/to/file.xml") + def valid?(input) + validate(input).length == 0 end end end