diff --git a/lib/parse.js b/lib/parse.js index 76f7cf6..731161e 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -2,7 +2,7 @@ * Module dependencies. */ -var DOMParser = require('xmldom').DOMParser; +var DOMParser = require('./xmldom/dom-parser').DOMParser; /** * Module exports. diff --git a/lib/xmldom/dom-parser.js b/lib/xmldom/dom-parser.js new file mode 100644 index 0000000..2ad090d --- /dev/null +++ b/lib/xmldom/dom-parser.js @@ -0,0 +1,253 @@ +function DOMParser(options){ + this.options = options ||{locator:{}}; +} + +DOMParser.prototype.parseFromString = function(source,mimeType){ + var options = this.options; + var sax = new XMLReader(); + var domBuilder = options.domBuilder || new DOMHandler();//contentHandler and LexicalHandler + var errorHandler = options.errorHandler; + var locator = options.locator; + var defaultNSMap = options.xmlns||{}; + var isHTML = /\/x?html?$/.test(mimeType);//mimeType.toLowerCase().indexOf('html') > -1; + var entityMap = isHTML?htmlEntity.entityMap:{'lt':'<','gt':'>','amp':'&','quot':'"','apos':"'"}; + if(locator){ + domBuilder.setDocumentLocator(locator) + } + + sax.errorHandler = buildErrorHandler(errorHandler,domBuilder,locator); + sax.domBuilder = options.domBuilder || domBuilder; + if(isHTML){ + defaultNSMap['']= 'http://www.w3.org/1999/xhtml'; + } + defaultNSMap.xml = defaultNSMap.xml || 'http://www.w3.org/XML/1998/namespace'; + if(source && typeof source === 'string'){ + sax.parse(source,defaultNSMap,entityMap); + }else{ + sax.errorHandler.error("invalid doc source"); + } + return domBuilder.doc; +} +function buildErrorHandler(errorImpl,domBuilder,locator){ + if(!errorImpl){ + if(domBuilder instanceof DOMHandler){ + return domBuilder; + } + errorImpl = domBuilder ; + } + var errorHandler = {} + var isCallback = errorImpl instanceof Function; + locator = locator||{} + function build(key){ + var fn = errorImpl[key]; + if(!fn && isCallback){ + fn = errorImpl.length == 2?function(msg){errorImpl(key,msg)}:errorImpl; + } + errorHandler[key] = fn && function(msg){ + fn('[xmldom '+key+']\t'+msg+_locator(locator)); + }||function(){}; + } + build('warning'); + build('error'); + build('fatalError'); + return errorHandler; +} + +//console.log('#\n\n\n\n\n\n\n####') +/** + * +ContentHandler+ErrorHandler + * +LexicalHandler+EntityResolver2 + * -DeclHandler-DTDHandler + * + * DefaultHandler:EntityResolver, DTDHandler, ContentHandler, ErrorHandler + * DefaultHandler2:DefaultHandler,LexicalHandler, DeclHandler, EntityResolver2 + * @link http://www.saxproject.org/apidoc/org/xml/sax/helpers/DefaultHandler.html + */ +function DOMHandler() { + this.cdata = false; +} +function position(locator,node){ + node.lineNumber = locator.lineNumber; + node.columnNumber = locator.columnNumber; +} +/** + * @see org.xml.sax.ContentHandler#startDocument + * @link http://www.saxproject.org/apidoc/org/xml/sax/ContentHandler.html + */ +DOMHandler.prototype = { + startDocument : function() { + this.doc = new DOMImplementation().createDocument(null, null, null); + if (this.locator) { + this.doc.documentURI = this.locator.systemId; + } + }, + startElement:function(namespaceURI, localName, qName, attrs) { + var doc = this.doc; + var el = doc.createElementNS(namespaceURI, qName||localName); + var len = attrs.length; + appendElement(this, el); + this.currentElement = el; + + this.locator && position(this.locator,el) + for (var i = 0 ; i < len; i++) { + var namespaceURI = attrs.getURI(i); + var value = attrs.getValue(i); + var qName = attrs.getQName(i); + var attr = doc.createAttributeNS(namespaceURI, qName); + this.locator &&position(attrs.getLocator(i),attr); + attr.value = attr.nodeValue = value; + el.setAttributeNode(attr) + } + }, + endElement:function(namespaceURI, localName, qName) { + var current = this.currentElement + var tagName = current.tagName; + this.currentElement = current.parentNode; + }, + startPrefixMapping:function(prefix, uri) { + }, + endPrefixMapping:function(prefix) { + }, + processingInstruction:function(target, data) { + var ins = this.doc.createProcessingInstruction(target, data); + this.locator && position(this.locator,ins) + appendElement(this, ins); + }, + ignorableWhitespace:function(ch, start, length) { + }, + characters:function(chars, start, length) { + chars = _toString.apply(this,arguments) + //console.log(chars) + if(chars){ + if (this.cdata) { + var charNode = this.doc.createCDATASection(chars); + } else { + var charNode = this.doc.createTextNode(chars); + } + if(this.currentElement){ + this.currentElement.appendChild(charNode); + }else if(/^\s*$/.test(chars)){ + this.doc.appendChild(charNode); + //process xml + } + this.locator && position(this.locator,charNode) + } + }, + skippedEntity:function(name) { + }, + endDocument:function() { + this.doc.normalize(); + }, + setDocumentLocator:function (locator) { + if(this.locator = locator){// && !('lineNumber' in locator)){ + locator.lineNumber = 0; + } + }, + //LexicalHandler + comment:function(chars, start, length) { + chars = _toString.apply(this,arguments) + var comm = this.doc.createComment(chars); + this.locator && position(this.locator,comm) + appendElement(this, comm); + }, + + startCDATA:function() { + //used in characters() methods + this.cdata = true; + }, + endCDATA:function() { + this.cdata = false; + }, + + startDTD:function(name, publicId, systemId) { + var impl = this.doc.implementation; + if (impl && impl.createDocumentType) { + var dt = impl.createDocumentType(name, publicId, systemId); + this.locator && position(this.locator,dt) + appendElement(this, dt); + } + }, + /** + * @see org.xml.sax.ErrorHandler + * @link http://www.saxproject.org/apidoc/org/xml/sax/ErrorHandler.html + */ + warning:function(error) { + console.warn('[xmldom warning]\t'+error,_locator(this.locator)); + }, + error:function(error) { + console.error('[xmldom error]\t'+error,_locator(this.locator)); + }, + fatalError:function(error) { + throw new ParseError(error, this.locator); + } +} +function _locator(l){ + if(l){ + return '\n@'+(l.systemId ||'')+'#[line:'+l.lineNumber+',col:'+l.columnNumber+']' + } +} +function _toString(chars,start,length){ + if(typeof chars == 'string'){ + return chars.substr(start,length) + }else{//java sax connect width xmldom on rhino(what about: "? && !(chars instanceof String)") + if(chars.length >= start+length || start){ + return new java.lang.String(chars,start,length)+''; + } + return chars; + } +} + +/* + * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/LexicalHandler.html + * used method of org.xml.sax.ext.LexicalHandler: + * #comment(chars, start, length) + * #startCDATA() + * #endCDATA() + * #startDTD(name, publicId, systemId) + * + * + * IGNORED method of org.xml.sax.ext.LexicalHandler: + * #endDTD() + * #startEntity(name) + * #endEntity(name) + * + * + * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/DeclHandler.html + * IGNORED method of org.xml.sax.ext.DeclHandler + * #attributeDecl(eName, aName, type, mode, value) + * #elementDecl(name, model) + * #externalEntityDecl(name, publicId, systemId) + * #internalEntityDecl(name, value) + * @link http://www.saxproject.org/apidoc/org/xml/sax/ext/EntityResolver2.html + * IGNORED method of org.xml.sax.EntityResolver2 + * #resolveEntity(String name,String publicId,String baseURI,String systemId) + * #resolveEntity(publicId, systemId) + * #getExternalSubset(name, baseURI) + * @link http://www.saxproject.org/apidoc/org/xml/sax/DTDHandler.html + * IGNORED method of org.xml.sax.DTDHandler + * #notationDecl(name, publicId, systemId) {}; + * #unparsedEntityDecl(name, publicId, systemId, notationName) {}; + */ +"endDTD,startEntity,endEntity,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,resolveEntity,getExternalSubset,notationDecl,unparsedEntityDecl".replace(/\w+/g,function(key){ + DOMHandler.prototype[key] = function(){return null} +}) + +/* Private static helpers treated below as private instance methods, so don't need to add these to the public API; we might use a Relator to also get rid of non-standard public properties */ +function appendElement (hander,node) { + if (!hander.currentElement) { + hander.doc.appendChild(node); + } else { + hander.currentElement.appendChild(node); + } +}//appendChild and setAttributeNS are preformance key + +//if(typeof require == 'function'){ +var htmlEntity = require('./entities'); +var sax = require('./sax'); +var XMLReader = sax.XMLReader; +var ParseError = sax.ParseError; +var DOMImplementation = exports.DOMImplementation = require('./dom').DOMImplementation; +exports.XMLSerializer = require('./dom').XMLSerializer ; +exports.DOMParser = DOMParser; +exports.__DOMHandler = DOMHandler; +//} diff --git a/lib/xmldom/dom.js b/lib/xmldom/dom.js new file mode 100644 index 0000000..12288c8 --- /dev/null +++ b/lib/xmldom/dom.js @@ -0,0 +1,1283 @@ +function copy(src,dest){ + for(var p in src){ + dest[p] = src[p]; + } +} +/** +^\w+\.prototype\.([_\w]+)\s*=\s*((?:.*\{\s*?[\r\n][\s\S]*?^})|\S.*?(?=[;\r\n]));? +^\w+\.prototype\.([_\w]+)\s*=\s*(\S.*?(?=[;\r\n]));? + */ +function _extends(Class,Super){ + var pt = Class.prototype; + if(!(pt instanceof Super)){ + function t(){}; + t.prototype = Super.prototype; + t = new t(); + copy(pt,t); + Class.prototype = pt = t; + } + if(pt.constructor != Class){ + if(typeof Class != 'function'){ + console.error("unknow Class:"+Class) + } + pt.constructor = Class + } +} +var htmlns = 'http://www.w3.org/1999/xhtml' ; +// Node Types +var NodeType = {} +var ELEMENT_NODE = NodeType.ELEMENT_NODE = 1; +var ATTRIBUTE_NODE = NodeType.ATTRIBUTE_NODE = 2; +var TEXT_NODE = NodeType.TEXT_NODE = 3; +var CDATA_SECTION_NODE = NodeType.CDATA_SECTION_NODE = 4; +var ENTITY_REFERENCE_NODE = NodeType.ENTITY_REFERENCE_NODE = 5; +var ENTITY_NODE = NodeType.ENTITY_NODE = 6; +var PROCESSING_INSTRUCTION_NODE = NodeType.PROCESSING_INSTRUCTION_NODE = 7; +var COMMENT_NODE = NodeType.COMMENT_NODE = 8; +var DOCUMENT_NODE = NodeType.DOCUMENT_NODE = 9; +var DOCUMENT_TYPE_NODE = NodeType.DOCUMENT_TYPE_NODE = 10; +var DOCUMENT_FRAGMENT_NODE = NodeType.DOCUMENT_FRAGMENT_NODE = 11; +var NOTATION_NODE = NodeType.NOTATION_NODE = 12; + +// ExceptionCode +var ExceptionCode = {} +var ExceptionMessage = {}; +var INDEX_SIZE_ERR = ExceptionCode.INDEX_SIZE_ERR = ((ExceptionMessage[1]="Index size error"),1); +var DOMSTRING_SIZE_ERR = ExceptionCode.DOMSTRING_SIZE_ERR = ((ExceptionMessage[2]="DOMString size error"),2); +var HIERARCHY_REQUEST_ERR = ExceptionCode.HIERARCHY_REQUEST_ERR = ((ExceptionMessage[3]="Hierarchy request error"),3); +var WRONG_DOCUMENT_ERR = ExceptionCode.WRONG_DOCUMENT_ERR = ((ExceptionMessage[4]="Wrong document"),4); +var INVALID_CHARACTER_ERR = ExceptionCode.INVALID_CHARACTER_ERR = ((ExceptionMessage[5]="Invalid character"),5); +var NO_DATA_ALLOWED_ERR = ExceptionCode.NO_DATA_ALLOWED_ERR = ((ExceptionMessage[6]="No data allowed"),6); +var NO_MODIFICATION_ALLOWED_ERR = ExceptionCode.NO_MODIFICATION_ALLOWED_ERR = ((ExceptionMessage[7]="No modification allowed"),7); +var NOT_FOUND_ERR = ExceptionCode.NOT_FOUND_ERR = ((ExceptionMessage[8]="Not found"),8); +var NOT_SUPPORTED_ERR = ExceptionCode.NOT_SUPPORTED_ERR = ((ExceptionMessage[9]="Not supported"),9); +var INUSE_ATTRIBUTE_ERR = ExceptionCode.INUSE_ATTRIBUTE_ERR = ((ExceptionMessage[10]="Attribute in use"),10); +//level2 +var INVALID_STATE_ERR = ExceptionCode.INVALID_STATE_ERR = ((ExceptionMessage[11]="Invalid state"),11); +var SYNTAX_ERR = ExceptionCode.SYNTAX_ERR = ((ExceptionMessage[12]="Syntax error"),12); +var INVALID_MODIFICATION_ERR = ExceptionCode.INVALID_MODIFICATION_ERR = ((ExceptionMessage[13]="Invalid modification"),13); +var NAMESPACE_ERR = ExceptionCode.NAMESPACE_ERR = ((ExceptionMessage[14]="Invalid namespace"),14); +var INVALID_ACCESS_ERR = ExceptionCode.INVALID_ACCESS_ERR = ((ExceptionMessage[15]="Invalid access"),15); + +/** + * DOM Level 2 + * Object DOMException + * @see http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/ecma-script-binding.html + * @see http://www.w3.org/TR/REC-DOM-Level-1/ecma-script-language-binding.html + */ +function DOMException(code, message) { + if(message instanceof Error){ + var error = message; + }else{ + error = this; + Error.call(this, ExceptionMessage[code]); + this.message = ExceptionMessage[code]; + if(Error.captureStackTrace) Error.captureStackTrace(this, DOMException); + } + error.code = code; + if(message) this.message = this.message + ": " + message; + return error; +}; +DOMException.prototype = Error.prototype; +copy(ExceptionCode,DOMException) +/** + * @see http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-536297177 + * The NodeList interface provides the abstraction of an ordered collection of nodes, without defining or constraining how this collection is implemented. NodeList objects in the DOM are live. + * The items in the NodeList are accessible via an integral index, starting from 0. + */ +function NodeList() { +}; +NodeList.prototype = { + /** + * The number of nodes in the list. The range of valid child node indices is 0 to length-1 inclusive. + * @standard level1 + */ + length:0, + /** + * Returns the indexth item in the collection. If index is greater than or equal to the number of nodes in the list, this returns null. + * @standard level1 + * @param index unsigned long + * Index into the collection. + * @return Node + * The node at the indexth position in the NodeList, or null if that is not a valid index. + */ + item: function(index) { + return this[index] || null; + }, + toString:function(isHTML,nodeFilter){ + for(var buf = [], i = 0;i=0){ + var lastIndex = list.length-1 + while(i0 || key == 'xmlns'){ +// return null; +// } + //console.log() + var i = this.length; + while(i--){ + var attr = this[i]; + //console.log(attr.nodeName,key) + if(attr.nodeName == key){ + return attr; + } + } + }, + setNamedItem: function(attr) { + var el = attr.ownerElement; + if(el && el!=this._ownerElement){ + throw new DOMException(INUSE_ATTRIBUTE_ERR); + } + var oldAttr = this.getNamedItem(attr.nodeName); + _addNamedNode(this._ownerElement,this,attr,oldAttr); + return oldAttr; + }, + /* returns Node */ + setNamedItemNS: function(attr) {// raises: WRONG_DOCUMENT_ERR,NO_MODIFICATION_ALLOWED_ERR,INUSE_ATTRIBUTE_ERR + var el = attr.ownerElement, oldAttr; + if(el && el!=this._ownerElement){ + throw new DOMException(INUSE_ATTRIBUTE_ERR); + } + oldAttr = this.getNamedItemNS(attr.namespaceURI,attr.localName); + _addNamedNode(this._ownerElement,this,attr,oldAttr); + return oldAttr; + }, + + /* returns Node */ + removeNamedItem: function(key) { + var attr = this.getNamedItem(key); + _removeNamedNode(this._ownerElement,this,attr); + return attr; + + + },// raises: NOT_FOUND_ERR,NO_MODIFICATION_ALLOWED_ERR + + //for level2 + removeNamedItemNS:function(namespaceURI,localName){ + var attr = this.getNamedItemNS(namespaceURI,localName); + _removeNamedNode(this._ownerElement,this,attr); + return attr; + }, + getNamedItemNS: function(namespaceURI, localName) { + var i = this.length; + while(i--){ + var node = this[i]; + if(node.localName == localName && node.namespaceURI == namespaceURI){ + return node; + } + } + return null; + } +}; +/** + * @see http://www.w3.org/TR/REC-DOM-Level-1/level-one-core.html#ID-102161490 + */ +function DOMImplementation(/* Object */ features) { + this._features = {}; + if (features) { + for (var feature in features) { + this._features = features[feature]; + } + } +}; + +DOMImplementation.prototype = { + hasFeature: function(/* string */ feature, /* string */ version) { + var versions = this._features[feature.toLowerCase()]; + if (versions && (!version || version in versions)) { + return true; + } else { + return false; + } + }, + // Introduced in DOM Level 2: + createDocument:function(namespaceURI, qualifiedName, doctype){// raises:INVALID_CHARACTER_ERR,NAMESPACE_ERR,WRONG_DOCUMENT_ERR + var doc = new Document(); + doc.implementation = this; + doc.childNodes = new NodeList(); + doc.doctype = doctype; + if(doctype){ + doc.appendChild(doctype); + } + if(qualifiedName){ + var root = doc.createElementNS(namespaceURI,qualifiedName); + doc.appendChild(root); + } + return doc; + }, + // Introduced in DOM Level 2: + createDocumentType:function(qualifiedName, publicId, systemId){// raises:INVALID_CHARACTER_ERR,NAMESPACE_ERR + var node = new DocumentType(); + node.name = qualifiedName; + node.nodeName = qualifiedName; + node.publicId = publicId; + node.systemId = systemId; + // Introduced in DOM Level 2: + //readonly attribute DOMString internalSubset; + + //TODO:.. + // readonly attribute NamedNodeMap entities; + // readonly attribute NamedNodeMap notations; + return node; + } +}; + + +/** + * @see http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html#ID-1950641247 + */ + +function Node() { +}; + +Node.prototype = { + firstChild : null, + lastChild : null, + previousSibling : null, + nextSibling : null, + attributes : null, + parentNode : null, + childNodes : null, + ownerDocument : null, + nodeValue : null, + namespaceURI : null, + prefix : null, + localName : null, + // Modified in DOM Level 2: + insertBefore:function(newChild, refChild){//raises + return _insertBefore(this,newChild,refChild); + }, + replaceChild:function(newChild, oldChild){//raises + this.insertBefore(newChild,oldChild); + if(oldChild){ + this.removeChild(oldChild); + } + }, + removeChild:function(oldChild){ + return _removeChild(this,oldChild); + }, + appendChild:function(newChild){ + return this.insertBefore(newChild,null); + }, + hasChildNodes:function(){ + return this.firstChild != null; + }, + cloneNode:function(deep){ + return cloneNode(this.ownerDocument||this,this,deep); + }, + // Modified in DOM Level 2: + normalize:function(){ + var child = this.firstChild; + while(child){ + var next = child.nextSibling; + if(next && next.nodeType == TEXT_NODE && child.nodeType == TEXT_NODE){ + this.removeChild(next); + child.appendData(next.data); + }else{ + child.normalize(); + child = next; + } + } + }, + // Introduced in DOM Level 2: + isSupported:function(feature, version){ + return this.ownerDocument.implementation.hasFeature(feature,version); + }, + // Introduced in DOM Level 2: + hasAttributes:function(){ + return this.attributes.length>0; + }, + lookupPrefix:function(namespaceURI){ + var el = this; + while(el){ + var map = el._nsMap; + //console.dir(map) + if(map){ + for(var n in map){ + if(map[n] == namespaceURI){ + return n; + } + } + } + el = el.nodeType == ATTRIBUTE_NODE?el.ownerDocument : el.parentNode; + } + return null; + }, + // Introduced in DOM Level 3: + lookupNamespaceURI:function(prefix){ + var el = this; + while(el){ + var map = el._nsMap; + //console.dir(map) + if(map){ + if(prefix in map){ + return map[prefix] ; + } + } + el = el.nodeType == ATTRIBUTE_NODE?el.ownerDocument : el.parentNode; + } + return null; + }, + // Introduced in DOM Level 3: + isDefaultNamespace:function(namespaceURI){ + var prefix = this.lookupPrefix(namespaceURI); + return prefix == null; + } +}; + + +function _xmlEncoder(c){ + return c == '<' && '<' || + c == '>' && '>' || + c == '&' && '&' || + c == '"' && '"' || + '&#'+c.charCodeAt()+';' +} + + +copy(NodeType,Node); +copy(NodeType,Node.prototype); + +/** + * @param callback return true for continue,false for break + * @return boolean true: break visit; + */ +function _visitNode(node,callback){ + if(callback(node)){ + return true; + } + if(node = node.firstChild){ + do{ + if(_visitNode(node,callback)){return true} + }while(node=node.nextSibling) + } +} + + + +function Document(){ +} +function _onAddAttribute(doc,el,newAttr){ + doc && doc._inc++; + var ns = newAttr.namespaceURI ; + if(ns == 'http://www.w3.org/2000/xmlns/'){ + //update namespace + el._nsMap[newAttr.prefix?newAttr.localName:''] = newAttr.value + } +} +function _onRemoveAttribute(doc,el,newAttr,remove){ + doc && doc._inc++; + var ns = newAttr.namespaceURI ; + if(ns == 'http://www.w3.org/2000/xmlns/'){ + //update namespace + delete el._nsMap[newAttr.prefix?newAttr.localName:''] + } +} +function _onUpdateChild(doc,el,newChild){ + if(doc && doc._inc){ + doc._inc++; + //update childNodes + var cs = el.childNodes; + if(newChild){ + cs[cs.length++] = newChild; + }else{ + //console.log(1) + var child = el.firstChild; + var i = 0; + while(child){ + cs[i++] = child; + child =child.nextSibling; + } + cs.length = i; + } + } +} + +/** + * attributes; + * children; + * + * writeable properties: + * nodeValue,Attr:value,CharacterData:data + * prefix + */ +function _removeChild(parentNode,child){ + var previous = child.previousSibling; + var next = child.nextSibling; + if(previous){ + previous.nextSibling = next; + }else{ + parentNode.firstChild = next + } + if(next){ + next.previousSibling = previous; + }else{ + parentNode.lastChild = previous; + } + _onUpdateChild(parentNode.ownerDocument,parentNode); + return child; +} +/** + * preformance key(refChild == null) + */ +function _insertBefore(parentNode,newChild,nextChild){ + var cp = newChild.parentNode; + if(cp){ + cp.removeChild(newChild);//remove and update + } + if(newChild.nodeType === DOCUMENT_FRAGMENT_NODE){ + var newFirst = newChild.firstChild; + if (newFirst == null) { + return newChild; + } + var newLast = newChild.lastChild; + }else{ + newFirst = newLast = newChild; + } + var pre = nextChild ? nextChild.previousSibling : parentNode.lastChild; + + newFirst.previousSibling = pre; + newLast.nextSibling = nextChild; + + + if(pre){ + pre.nextSibling = newFirst; + }else{ + parentNode.firstChild = newFirst; + } + if(nextChild == null){ + parentNode.lastChild = newLast; + }else{ + nextChild.previousSibling = newLast; + } + do{ + newFirst.parentNode = parentNode; + }while(newFirst !== newLast && (newFirst= newFirst.nextSibling)) + _onUpdateChild(parentNode.ownerDocument||parentNode,parentNode); + //console.log(parentNode.lastChild.nextSibling == null) + if (newChild.nodeType == DOCUMENT_FRAGMENT_NODE) { + newChild.firstChild = newChild.lastChild = null; + } + return newChild; +} +function _appendSingleChild(parentNode,newChild){ + var cp = newChild.parentNode; + if(cp){ + var pre = parentNode.lastChild; + cp.removeChild(newChild);//remove and update + var pre = parentNode.lastChild; + } + var pre = parentNode.lastChild; + newChild.parentNode = parentNode; + newChild.previousSibling = pre; + newChild.nextSibling = null; + if(pre){ + pre.nextSibling = newChild; + }else{ + parentNode.firstChild = newChild; + } + parentNode.lastChild = newChild; + _onUpdateChild(parentNode.ownerDocument,parentNode,newChild); + return newChild; + //console.log("__aa",parentNode.lastChild.nextSibling == null) +} +Document.prototype = { + //implementation : null, + nodeName : '#document', + nodeType : DOCUMENT_NODE, + doctype : null, + documentElement : null, + _inc : 1, + + insertBefore : function(newChild, refChild){//raises + if(newChild.nodeType == DOCUMENT_FRAGMENT_NODE){ + var child = newChild.firstChild; + while(child){ + var next = child.nextSibling; + this.insertBefore(child,refChild); + child = next; + } + return newChild; + } + if(this.documentElement == null && newChild.nodeType == ELEMENT_NODE){ + this.documentElement = newChild; + } + + return _insertBefore(this,newChild,refChild),(newChild.ownerDocument = this),newChild; + }, + removeChild : function(oldChild){ + if(this.documentElement == oldChild){ + this.documentElement = null; + } + return _removeChild(this,oldChild); + }, + // Introduced in DOM Level 2: + importNode : function(importedNode,deep){ + return importNode(this,importedNode,deep); + }, + // Introduced in DOM Level 2: + getElementById : function(id){ + var rtv = null; + _visitNode(this.documentElement,function(node){ + if(node.nodeType == ELEMENT_NODE){ + if(node.getAttribute('id') == id){ + rtv = node; + return true; + } + } + }) + return rtv; + }, + + getElementsByClassName: function(className) { + var pattern = new RegExp("(^|\\s)" + className + "(\\s|$)"); + return new LiveNodeList(this, function(base) { + var ls = []; + _visitNode(base.documentElement, function(node) { + if(node !== base && node.nodeType == ELEMENT_NODE) { + if(pattern.test(node.getAttribute('class'))) { + ls.push(node); + } + } + }); + return ls; + }); + }, + + //document factory method: + createElement : function(tagName){ + var node = new Element(); + node.ownerDocument = this; + node.nodeName = tagName; + node.tagName = tagName; + node.childNodes = new NodeList(); + var attrs = node.attributes = new NamedNodeMap(); + attrs._ownerElement = node; + return node; + }, + createDocumentFragment : function(){ + var node = new DocumentFragment(); + node.ownerDocument = this; + node.childNodes = new NodeList(); + return node; + }, + createTextNode : function(data){ + var node = new Text(); + node.ownerDocument = this; + node.appendData(data) + return node; + }, + createComment : function(data){ + var node = new Comment(); + node.ownerDocument = this; + node.appendData(data) + return node; + }, + createCDATASection : function(data){ + var node = new CDATASection(); + node.ownerDocument = this; + node.appendData(data) + return node; + }, + createProcessingInstruction : function(target,data){ + var node = new ProcessingInstruction(); + node.ownerDocument = this; + node.tagName = node.target = target; + node.nodeValue= node.data = data; + return node; + }, + createAttribute : function(name){ + var node = new Attr(); + node.ownerDocument = this; + node.name = name; + node.nodeName = name; + node.localName = name; + node.specified = true; + return node; + }, + createEntityReference : function(name){ + var node = new EntityReference(); + node.ownerDocument = this; + node.nodeName = name; + return node; + }, + // Introduced in DOM Level 2: + createElementNS : function(namespaceURI,qualifiedName){ + var node = new Element(); + var pl = qualifiedName.split(':'); + var attrs = node.attributes = new NamedNodeMap(); + node.childNodes = new NodeList(); + node.ownerDocument = this; + node.nodeName = qualifiedName; + node.tagName = qualifiedName; + node.namespaceURI = namespaceURI; + if(pl.length == 2){ + node.prefix = pl[0]; + node.localName = pl[1]; + }else{ + //el.prefix = null; + node.localName = qualifiedName; + } + attrs._ownerElement = node; + return node; + }, + // Introduced in DOM Level 2: + createAttributeNS : function(namespaceURI,qualifiedName){ + var node = new Attr(); + var pl = qualifiedName.split(':'); + node.ownerDocument = this; + node.nodeName = qualifiedName; + node.name = qualifiedName; + node.namespaceURI = namespaceURI; + node.specified = true; + if(pl.length == 2){ + node.prefix = pl[0]; + node.localName = pl[1]; + }else{ + //el.prefix = null; + node.localName = qualifiedName; + } + return node; + } +}; +_extends(Document,Node); + + +function Element() { + this._nsMap = {}; +}; +Element.prototype = { + nodeType : ELEMENT_NODE, + hasAttribute : function(name){ + return this.getAttributeNode(name)!=null; + }, + getAttribute : function(name){ + var attr = this.getAttributeNode(name); + return attr && attr.value || ''; + }, + getAttributeNode : function(name){ + return this.attributes.getNamedItem(name); + }, + setAttribute : function(name, value){ + var attr = this.ownerDocument.createAttribute(name); + attr.value = attr.nodeValue = "" + value; + this.setAttributeNode(attr) + }, + removeAttribute : function(name){ + var attr = this.getAttributeNode(name) + attr && this.removeAttributeNode(attr); + }, + + //four real opeartion method + appendChild:function(newChild){ + if(newChild.nodeType === DOCUMENT_FRAGMENT_NODE){ + return this.insertBefore(newChild,null); + }else{ + return _appendSingleChild(this,newChild); + } + }, + setAttributeNode : function(newAttr){ + return this.attributes.setNamedItem(newAttr); + }, + setAttributeNodeNS : function(newAttr){ + return this.attributes.setNamedItemNS(newAttr); + }, + removeAttributeNode : function(oldAttr){ + //console.log(this == oldAttr.ownerElement) + return this.attributes.removeNamedItem(oldAttr.nodeName); + }, + //get real attribute name,and remove it by removeAttributeNode + removeAttributeNS : function(namespaceURI, localName){ + var old = this.getAttributeNodeNS(namespaceURI, localName); + old && this.removeAttributeNode(old); + }, + + hasAttributeNS : function(namespaceURI, localName){ + return this.getAttributeNodeNS(namespaceURI, localName)!=null; + }, + getAttributeNS : function(namespaceURI, localName){ + var attr = this.getAttributeNodeNS(namespaceURI, localName); + return attr && attr.value || ''; + }, + setAttributeNS : function(namespaceURI, qualifiedName, value){ + var attr = this.ownerDocument.createAttributeNS(namespaceURI, qualifiedName); + attr.value = attr.nodeValue = "" + value; + this.setAttributeNode(attr) + }, + getAttributeNodeNS : function(namespaceURI, localName){ + return this.attributes.getNamedItemNS(namespaceURI, localName); + }, + + getElementsByTagName : function(tagName){ + return new LiveNodeList(this,function(base){ + var ls = []; + _visitNode(base,function(node){ + if(node !== base && node.nodeType == ELEMENT_NODE && (tagName === '*' || node.tagName == tagName)){ + ls.push(node); + } + }); + return ls; + }); + }, + getElementsByTagNameNS : function(namespaceURI, localName){ + return new LiveNodeList(this,function(base){ + var ls = []; + _visitNode(base,function(node){ + if(node !== base && node.nodeType === ELEMENT_NODE && (namespaceURI === '*' || node.namespaceURI === namespaceURI) && (localName === '*' || node.localName == localName)){ + ls.push(node); + } + }); + return ls; + + }); + } +}; +Document.prototype.getElementsByTagName = Element.prototype.getElementsByTagName; +Document.prototype.getElementsByTagNameNS = Element.prototype.getElementsByTagNameNS; + + +_extends(Element,Node); +function Attr() { +}; +Attr.prototype.nodeType = ATTRIBUTE_NODE; +_extends(Attr,Node); + + +function CharacterData() { +}; +CharacterData.prototype = { + data : '', + substringData : function(offset, count) { + return this.data.substring(offset, offset+count); + }, + appendData: function(text) { + text = this.data+text; + this.nodeValue = this.data = text; + this.length = text.length; + }, + insertData: function(offset,text) { + this.replaceData(offset,0,text); + + }, + appendChild:function(newChild){ + throw new Error(ExceptionMessage[HIERARCHY_REQUEST_ERR]) + }, + deleteData: function(offset, count) { + this.replaceData(offset,count,""); + }, + replaceData: function(offset, count, text) { + var start = this.data.substring(0,offset); + var end = this.data.substring(offset+count); + text = start + text + end; + this.nodeValue = this.data = text; + this.length = text.length; + } +} +_extends(CharacterData,Node); +function Text() { +}; +Text.prototype = { + nodeName : "#text", + nodeType : TEXT_NODE, + splitText : function(offset) { + var text = this.data; + var newText = text.substring(offset); + text = text.substring(0, offset); + this.data = this.nodeValue = text; + this.length = text.length; + var newNode = this.ownerDocument.createTextNode(newText); + if(this.parentNode){ + this.parentNode.insertBefore(newNode, this.nextSibling); + } + return newNode; + } +} +_extends(Text,CharacterData); +function Comment() { +}; +Comment.prototype = { + nodeName : "#comment", + nodeType : COMMENT_NODE +} +_extends(Comment,CharacterData); + +function CDATASection() { +}; +CDATASection.prototype = { + nodeName : "#cdata-section", + nodeType : CDATA_SECTION_NODE +} +_extends(CDATASection,CharacterData); + + +function DocumentType() { +}; +DocumentType.prototype.nodeType = DOCUMENT_TYPE_NODE; +_extends(DocumentType,Node); + +function Notation() { +}; +Notation.prototype.nodeType = NOTATION_NODE; +_extends(Notation,Node); + +function Entity() { +}; +Entity.prototype.nodeType = ENTITY_NODE; +_extends(Entity,Node); + +function EntityReference() { +}; +EntityReference.prototype.nodeType = ENTITY_REFERENCE_NODE; +_extends(EntityReference,Node); + +function DocumentFragment() { +}; +DocumentFragment.prototype.nodeName = "#document-fragment"; +DocumentFragment.prototype.nodeType = DOCUMENT_FRAGMENT_NODE; +_extends(DocumentFragment,Node); + + +function ProcessingInstruction() { +} +ProcessingInstruction.prototype.nodeType = PROCESSING_INSTRUCTION_NODE; +_extends(ProcessingInstruction,Node); +function XMLSerializer(){} +XMLSerializer.prototype.serializeToString = function(node,isHtml,nodeFilter){ + return nodeSerializeToString.call(node,isHtml,nodeFilter); +} +Node.prototype.toString = nodeSerializeToString; +function nodeSerializeToString(isHtml,nodeFilter){ + var buf = []; + var refNode = this.nodeType == 9 && this.documentElement || this; + var prefix = refNode.prefix; + var uri = refNode.namespaceURI; + + if(uri && prefix == null){ + //console.log(prefix) + var prefix = refNode.lookupPrefix(uri); + if(prefix == null){ + //isHTML = true; + var visibleNamespaces=[ + {namespace:uri,prefix:null} + //{namespace:uri,prefix:''} + ] + } + } + serializeToString(this,buf,isHtml,nodeFilter,visibleNamespaces); + //console.log('###',this.nodeType,uri,prefix,buf.join('')) + return buf.join(''); +} +function needNamespaceDefine(node,isHTML, visibleNamespaces) { + var prefix = node.prefix||''; + var uri = node.namespaceURI; + if (!prefix && !uri){ + return false; + } + if (prefix === "xml" && uri === "http://www.w3.org/XML/1998/namespace" + || uri == 'http://www.w3.org/2000/xmlns/'){ + return false; + } + + var i = visibleNamespaces.length + //console.log('@@@@',node.tagName,prefix,uri,visibleNamespaces) + while (i--) { + var ns = visibleNamespaces[i]; + // get namespace prefix + //console.log(node.nodeType,node.tagName,ns.prefix,prefix) + if (ns.prefix == prefix){ + return ns.namespace != uri; + } + } + //console.log(isHTML,uri,prefix=='') + //if(isHTML && prefix ==null && uri == 'http://www.w3.org/1999/xhtml'){ + // return false; + //} + //node.flag = '11111' + //console.error(3,true,node.flag,node.prefix,node.namespaceURI) + return true; +} +function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){ + if(nodeFilter){ + node = nodeFilter(node); + if(node){ + if(typeof node == 'string'){ + buf.push(node); + return; + } + }else{ + return; + } + //buf.sort.apply(attrs, attributeSorter); + } + switch(node.nodeType){ + case ELEMENT_NODE: + if (!visibleNamespaces) visibleNamespaces = []; + var startVisibleNamespaces = visibleNamespaces.length; + var attrs = node.attributes; + var len = attrs.length; + var child = node.firstChild; + var nodeName = node.tagName; + + isHTML = (htmlns === node.namespaceURI) ||isHTML + buf.push('<',nodeName); + + + + for(var i=0;i'); + //if is cdata child node + if(isHTML && /^script$/i.test(nodeName)){ + while(child){ + if(child.data){ + buf.push(child.data); + }else{ + serializeToString(child,buf,isHTML,nodeFilter,visibleNamespaces); + } + child = child.nextSibling; + } + }else + { + while(child){ + serializeToString(child,buf,isHTML,nodeFilter,visibleNamespaces); + child = child.nextSibling; + } + } + buf.push(''); + }else{ + buf.push('/>'); + } + // remove added visible namespaces + //visibleNamespaces.length = startVisibleNamespaces; + return; + case DOCUMENT_NODE: + case DOCUMENT_FRAGMENT_NODE: + var child = node.firstChild; + while(child){ + serializeToString(child,buf,isHTML,nodeFilter,visibleNamespaces); + child = child.nextSibling; + } + return; + case ATTRIBUTE_NODE: + /** + * Well-formedness constraint: No < in Attribute Values + * The replacement text of any entity referred to directly or indirectly in an attribute value must not contain a <. + * @see https://www.w3.org/TR/xml/#CleanAttrVals + * @see https://www.w3.org/TR/xml/#NT-AttValue + */ + return buf.push(' ', node.name, '="', node.value.replace(/[<&"]/g,_xmlEncoder), '"'); + case TEXT_NODE: + /** + * The ampersand character (&) and the left angle bracket (<) must not appear in their literal form, + * except when used as markup delimiters, or within a comment, a processing instruction, or a CDATA section. + * If they are needed elsewhere, they must be escaped using either numeric character references or the strings + * `&` and `<` respectively. + * The right angle bracket (>) may be represented using the string " > ", and must, for compatibility, + * be escaped using either `>` or a character reference when it appears in the string `]]>` in content, + * when that string is not marking the end of a CDATA section. + * + * In the content of elements, character data is any string of characters + * which does not contain the start-delimiter of any markup + * and does not include the CDATA-section-close delimiter, `]]>`. + * + * @see https://www.w3.org/TR/xml/#NT-CharData + */ + return buf.push(node.data + .replace(/[<&]/g,_xmlEncoder) + .replace(/]]>/g, ']]>') + ); + case CDATA_SECTION_NODE: + return buf.push( ''); + case COMMENT_NODE: + return buf.push( ""); + case DOCUMENT_TYPE_NODE: + var pubid = node.publicId; + var sysid = node.systemId; + buf.push(''); + }else if(sysid && sysid!='.'){ + buf.push(' SYSTEM ', sysid, '>'); + }else{ + var sub = node.internalSubset; + if(sub){ + buf.push(" [",sub,"]"); + } + buf.push(">"); + } + return; + case PROCESSING_INSTRUCTION_NODE: + return buf.push( ""); + case ENTITY_REFERENCE_NODE: + return buf.push( '&',node.nodeName,';'); + //case ENTITY_NODE: + //case NOTATION_NODE: + default: + buf.push('??',node.nodeName); + } +} +function importNode(doc,node,deep){ + var node2; + switch (node.nodeType) { + case ELEMENT_NODE: + node2 = node.cloneNode(false); + node2.ownerDocument = doc; + //var attrs = node2.attributes; + //var len = attrs.length; + //for(var i=0;i', + amp: '&', + quot: '"', + apos: "'", + Agrave: "À", + Aacute: "Á", + Acirc: "Â", + Atilde: "Ã", + Auml: "Ä", + Aring: "Å", + AElig: "Æ", + Ccedil: "Ç", + Egrave: "È", + Eacute: "É", + Ecirc: "Ê", + Euml: "Ë", + Igrave: "Ì", + Iacute: "Í", + Icirc: "Î", + Iuml: "Ï", + ETH: "Ð", + Ntilde: "Ñ", + Ograve: "Ò", + Oacute: "Ó", + Ocirc: "Ô", + Otilde: "Õ", + Ouml: "Ö", + Oslash: "Ø", + Ugrave: "Ù", + Uacute: "Ú", + Ucirc: "Û", + Uuml: "Ü", + Yacute: "Ý", + THORN: "Þ", + szlig: "ß", + agrave: "à", + aacute: "á", + acirc: "â", + atilde: "ã", + auml: "ä", + aring: "å", + aelig: "æ", + ccedil: "ç", + egrave: "è", + eacute: "é", + ecirc: "ê", + euml: "ë", + igrave: "ì", + iacute: "í", + icirc: "î", + iuml: "ï", + eth: "ð", + ntilde: "ñ", + ograve: "ò", + oacute: "ó", + ocirc: "ô", + otilde: "õ", + ouml: "ö", + oslash: "ø", + ugrave: "ù", + uacute: "ú", + ucirc: "û", + uuml: "ü", + yacute: "ý", + thorn: "þ", + yuml: "ÿ", + nbsp: "\u00a0", + iexcl: "¡", + cent: "¢", + pound: "£", + curren: "¤", + yen: "¥", + brvbar: "¦", + sect: "§", + uml: "¨", + copy: "©", + ordf: "ª", + laquo: "«", + not: "¬", + shy: "­­", + reg: "®", + macr: "¯", + deg: "°", + plusmn: "±", + sup2: "²", + sup3: "³", + acute: "´", + micro: "µ", + para: "¶", + middot: "·", + cedil: "¸", + sup1: "¹", + ordm: "º", + raquo: "»", + frac14: "¼", + frac12: "½", + frac34: "¾", + iquest: "¿", + times: "×", + divide: "÷", + forall: "∀", + part: "∂", + exist: "∃", + empty: "∅", + nabla: "∇", + isin: "∈", + notin: "∉", + ni: "∋", + prod: "∏", + sum: "∑", + minus: "−", + lowast: "∗", + radic: "√", + prop: "∝", + infin: "∞", + ang: "∠", + and: "∧", + or: "∨", + cap: "∩", + cup: "∪", + 'int': "∫", + there4: "∴", + sim: "∼", + cong: "≅", + asymp: "≈", + ne: "≠", + equiv: "≡", + le: "≤", + ge: "≥", + sub: "⊂", + sup: "⊃", + nsub: "⊄", + sube: "⊆", + supe: "⊇", + oplus: "⊕", + otimes: "⊗", + perp: "⊥", + sdot: "⋅", + Alpha: "Α", + Beta: "Β", + Gamma: "Γ", + Delta: "Δ", + Epsilon: "Ε", + Zeta: "Ζ", + Eta: "Η", + Theta: "Θ", + Iota: "Ι", + Kappa: "Κ", + Lambda: "Λ", + Mu: "Μ", + Nu: "Ν", + Xi: "Ξ", + Omicron: "Ο", + Pi: "Π", + Rho: "Ρ", + Sigma: "Σ", + Tau: "Τ", + Upsilon: "Υ", + Phi: "Φ", + Chi: "Χ", + Psi: "Ψ", + Omega: "Ω", + alpha: "α", + beta: "β", + gamma: "γ", + delta: "δ", + epsilon: "ε", + zeta: "ζ", + eta: "η", + theta: "θ", + iota: "ι", + kappa: "κ", + lambda: "λ", + mu: "μ", + nu: "ν", + xi: "ξ", + omicron: "ο", + pi: "π", + rho: "ρ", + sigmaf: "ς", + sigma: "σ", + tau: "τ", + upsilon: "υ", + phi: "φ", + chi: "χ", + psi: "ψ", + omega: "ω", + thetasym: "ϑ", + upsih: "ϒ", + piv: "ϖ", + OElig: "Œ", + oelig: "œ", + Scaron: "Š", + scaron: "š", + Yuml: "Ÿ", + fnof: "ƒ", + circ: "ˆ", + tilde: "˜", + ensp: " ", + emsp: " ", + thinsp: " ", + zwnj: "‌", + zwj: "‍", + lrm: "‎", + rlm: "‏", + ndash: "–", + mdash: "—", + lsquo: "‘", + rsquo: "’", + sbquo: "‚", + ldquo: "“", + rdquo: "”", + bdquo: "„", + dagger: "†", + Dagger: "‡", + bull: "•", + hellip: "…", + permil: "‰", + prime: "′", + Prime: "″", + lsaquo: "‹", + rsaquo: "›", + oline: "‾", + euro: "€", + trade: "™", + larr: "←", + uarr: "↑", + rarr: "→", + darr: "↓", + harr: "↔", + crarr: "↵", + lceil: "⌈", + rceil: "⌉", + lfloor: "⌊", + rfloor: "⌋", + loz: "◊", + spades: "♠", + clubs: "♣", + hearts: "♥", + diams: "♦" +}; diff --git a/lib/xmldom/sax.js b/lib/xmldom/sax.js new file mode 100644 index 0000000..ad9f184 --- /dev/null +++ b/lib/xmldom/sax.js @@ -0,0 +1,642 @@ +//[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] +//[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] +//[5] Name ::= NameStartChar (NameChar)* +var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF +var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]"); +var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$'); +//var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/ +//var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',') + +//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE +//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE +var S_TAG = 0;//tag name offerring +var S_ATTR = 1;//attr name offerring +var S_ATTR_SPACE=2;//attr name end and space offer +var S_EQ = 3;//=space? +var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only) +var S_ATTR_END = 5;//attr value end and no space(quot end) +var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer) +var S_TAG_CLOSE = 7;//closed el + +/** + * Creates an error that will not be caught by XMLReader aka the SAX parser. + * + * @param {string} message + * @param {any?} locator Optional, can provide details about the location in the source + * @constructor + */ +function ParseError(message, locator) { + this.message = message + this.locator = locator + if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError); +} +ParseError.prototype = new Error(); +ParseError.prototype.name = ParseError.name + +function XMLReader(){ + +} + +XMLReader.prototype = { + parse:function(source,defaultNSMap,entityMap){ + var domBuilder = this.domBuilder; + domBuilder.startDocument(); + _copy(defaultNSMap ,defaultNSMap = {}) + parse(source,defaultNSMap,entityMap, + domBuilder,this.errorHandler); + domBuilder.endDocument(); + } +} +function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){ + function fixedFromCharCode(code) { + // String.prototype.fromCharCode does not supports + // > 2 bytes unicode chars directly + if (code > 0xffff) { + code -= 0x10000; + var surrogate1 = 0xd800 + (code >> 10) + , surrogate2 = 0xdc00 + (code & 0x3ff); + + return String.fromCharCode(surrogate1, surrogate2); + } else { + return String.fromCharCode(code); + } + } + function entityReplacer(a){ + var k = a.slice(1,-1); + if(k in entityMap){ + return entityMap[k]; + }else if(k.charAt(0) === '#'){ + return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x'))) + }else{ + errorHandler.error('entity not found:'+a); + return a; + } + } + function appendText(end){//has some bugs + if(end>start){ + var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer); + locator&&position(start); + domBuilder.characters(xt,0,end-start); + start = end + } + } + function position(p,m){ + while(p>=lineEnd && (m = linePattern.exec(source))){ + lineStart = m.index; + lineEnd = lineStart + m[0].length; + locator.lineNumber++; + //console.log('line++:',locator,startPos,endPos) + } + locator.columnNumber = p-lineStart+1; + } + var lineStart = 0; + var lineEnd = 0; + var linePattern = /.*(?:\r\n?|\n)|.*$/g + var locator = domBuilder.locator; + + var parseStack = [{currentNSMap:defaultNSMapCopy}] + var closeMap = {}; + var start = 0; + while(true){ + try{ + var tagStart = source.indexOf('<',start); + if(tagStart<0){ + if(!source.substr(start).match(/^\s*$/)){ + var doc = domBuilder.doc; + var text = doc.createTextNode(source.substr(start)); + doc.appendChild(text); + domBuilder.currentElement = text; + } + return; + } + if(tagStart>start){ + appendText(tagStart); + } + switch(source.charAt(tagStart+1)){ + case '/': + var end = source.indexOf('>',tagStart+3); + var tagName = source.substring(tagStart+2,end); + var config = parseStack.pop(); + if(end<0){ + + tagName = source.substring(tagStart+2).replace(/[\s<].*/,''); + errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName); + end = tagStart+1+tagName.length; + }else if(tagName.match(/\s + locator&&position(tagStart); + end = parseInstruction(source,tagStart,domBuilder); + break; + case '!':// start){ + start = end; + }else{ + //TODO: 这里有可能sax回退,有位置错误风险 + appendText(Math.max(tagStart,start)+1); + } + } +} +function copyLocator(f,t){ + t.lineNumber = f.lineNumber; + t.columnNumber = f.columnNumber; + return t; +} + +/** + * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack); + * @return end of the elementStartPart(end of elementEndPart for selfClosed el) + */ +function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){ + + /** + * @param {string} qname + * @param {string} value + * @param {number} startIndex + */ + function addAttribute(qname, value, startIndex) { + if (qname in el.attributeNames) errorHandler.fatalError('Attribute ' + qname + ' redefined') + el.addValue(qname, value, startIndex) + } + var attrName; + var value; + var p = ++start; + var s = S_TAG;//status + while(true){ + var c = source.charAt(p); + switch(c){ + case '=': + if(s === S_ATTR){//attrName + attrName = source.slice(start,p); + s = S_EQ; + }else if(s === S_ATTR_SPACE){ + s = S_EQ; + }else{ + //fatalError: equal must after attrName or space after attrName + throw new Error('attribute equal must after attrName'); // No known test case + } + break; + case '\'': + case '"': + if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE + ){//equal + if(s === S_ATTR){ + errorHandler.warning('attribute value must after "="') + attrName = source.slice(start,p) + } + start = p+1; + p = source.indexOf(c,start) + if(p>0){ + value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); + addAttribute(attrName, value, start-1); + s = S_ATTR_END; + }else{ + //fatalError: no end quot match + throw new Error('attribute value no end \''+c+'\' match'); + } + }else if(s == S_ATTR_NOQUOT_VALUE){ + value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); + //console.log(attrName,value,start,p) + addAttribute(attrName, value, start); + //console.dir(el) + errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!'); + start = p+1; + s = S_ATTR_END + }else{ + //fatalError: no equal before + throw new Error('attribute value must after "="'); // No known test case + } + break; + case '/': + switch(s){ + case S_TAG: + el.setTagName(source.slice(start,p)); + case S_ATTR_END: + case S_TAG_SPACE: + case S_TAG_CLOSE: + s =S_TAG_CLOSE; + el.closed = true; + case S_ATTR_NOQUOT_VALUE: + case S_ATTR: + case S_ATTR_SPACE: + break; + //case S_EQ: + default: + throw new Error("attribute invalid close char('/')") // No known test case + } + break; + case ''://end document + errorHandler.error('unexpected end of input'); + if(s == S_TAG){ + el.setTagName(source.slice(start,p)); + } + return p; + case '>': + switch(s){ + case S_TAG: + el.setTagName(source.slice(start,p)); + case S_ATTR_END: + case S_TAG_SPACE: + case S_TAG_CLOSE: + break;//normal + case S_ATTR_NOQUOT_VALUE://Compatible state + case S_ATTR: + value = source.slice(start,p); + if(value.slice(-1) === '/'){ + el.closed = true; + value = value.slice(0,-1) + } + case S_ATTR_SPACE: + if(s === S_ATTR_SPACE){ + value = attrName; + } + if(s == S_ATTR_NOQUOT_VALUE){ + errorHandler.warning('attribute "'+value+'" missed quot(")!'); + addAttribute(attrName, value.replace(/&#?\w+;/g,entityReplacer), start) + }else{ + if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !value.match(/^(?:disabled|checked|selected)$/i)){ + errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!') + } + addAttribute(value, value, start) + } + break; + case S_EQ: + throw new Error('attribute value missed!!'); + } +// console.log(tagName,tagNamePattern,tagNamePattern.test(tagName)) + return p; + /*xml space '\x20' | #x9 | #xD | #xA; */ + case '\u0080': + c = ' '; + default: + if(c<= ' '){//space + switch(s){ + case S_TAG: + el.setTagName(source.slice(start,p));//tagName + s = S_TAG_SPACE; + break; + case S_ATTR: + attrName = source.slice(start,p) + s = S_ATTR_SPACE; + break; + case S_ATTR_NOQUOT_VALUE: + var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer); + errorHandler.warning('attribute "'+value+'" missed quot(")!!'); + addAttribute(attrName, value, start) + case S_ATTR_END: + s = S_TAG_SPACE; + break; + //case S_TAG_SPACE: + //case S_EQ: + //case S_ATTR_SPACE: + // void();break; + //case S_TAG_CLOSE: + //ignore warning + } + }else{//not space +//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE +//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE + switch(s){ + //case S_TAG:void();break; + //case S_ATTR:void();break; + //case S_ATTR_NOQUOT_VALUE:void();break; + case S_ATTR_SPACE: + var tagName = el.tagName; + if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !attrName.match(/^(?:disabled|checked|selected)$/i)){ + errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!') + } + addAttribute(attrName, attrName, start); + start = p; + s = S_ATTR; + break; + case S_ATTR_END: + errorHandler.warning('attribute space is required"'+attrName+'"!!') + case S_TAG_SPACE: + s = S_ATTR; + start = p; + break; + case S_EQ: + s = S_ATTR_NOQUOT_VALUE; + start = p; + break; + case S_TAG_CLOSE: + throw new Error("elements closed character '/' and '>' must be connected to"); + } + } + }//end outer switch + //console.log('p++',p) + p++; + } +} +/** + * @return true if has new namespace define + */ +function appendElement(el,domBuilder,currentNSMap){ + var tagName = el.tagName; + var localNSMap = null; + //var currentNSMap = parseStack[parseStack.length-1].currentNSMap; + var i = el.length; + while(i--){ + var a = el[i]; + var qName = a.qName; + var value = a.value; + var nsp = qName.indexOf(':'); + if(nsp>0){ + var prefix = a.prefix = qName.slice(0,nsp); + var localName = qName.slice(nsp+1); + var nsPrefix = prefix === 'xmlns' && localName + }else{ + localName = qName; + prefix = null + nsPrefix = qName === 'xmlns' && '' + } + //can not set prefix,because prefix !== '' + a.localName = localName ; + //prefix == null for no ns prefix attribute + if(nsPrefix !== false){//hack!! + if(localNSMap == null){ + localNSMap = {} + //console.log(currentNSMap,0) + _copy(currentNSMap,currentNSMap={}) + //console.log(currentNSMap,1) + } + currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value; + a.uri = 'http://www.w3.org/2000/xmlns/' + domBuilder.startPrefixMapping(nsPrefix, value) + } + } + var i = el.length; + while(i--){ + a = el[i]; + var prefix = a.prefix; + if(prefix){//no prefix attribute has no namespace + if(prefix === 'xml'){ + a.uri = 'http://www.w3.org/XML/1998/namespace'; + }if(prefix !== 'xmlns'){ + a.uri = currentNSMap[prefix || ''] + + //{console.log('###'+a.qName,domBuilder.locator.systemId+'',currentNSMap,a.uri)} + } + } + } + var nsp = tagName.indexOf(':'); + if(nsp>0){ + prefix = el.prefix = tagName.slice(0,nsp); + localName = el.localName = tagName.slice(nsp+1); + }else{ + prefix = null;//important!! + localName = el.localName = tagName; + } + //no prefix element has default namespace + var ns = el.uri = currentNSMap[prefix || '']; + domBuilder.startElement(ns,localName,tagName,el); + //endPrefixMapping and startPrefixMapping have not any help for dom builder + //localNSMap = null + if(el.closed){ + domBuilder.endElement(ns,localName,tagName); + if(localNSMap){ + for(prefix in localNSMap){ + domBuilder.endPrefixMapping(prefix) + } + } + }else{ + el.currentNSMap = currentNSMap; + el.localNSMap = localNSMap; + //parseStack.push(el); + return true; + } +} +function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){ + if(/^(?:script|textarea)$/i.test(tagName)){ + var elEndStart = source.indexOf('',elStartEnd); + var text = source.substring(elStartEnd+1,elEndStart); + if(/[&<]/.test(text)){ + if(/^script$/i.test(tagName)){ + //if(!/\]\]>/.test(text)){ + //lexHandler.startCDATA(); + domBuilder.characters(text,0,text.length); + //lexHandler.endCDATA(); + return elEndStart; + //} + }//}else{//text area + text = text.replace(/&#?\w+;/g,entityReplacer); + domBuilder.characters(text,0,text.length); + return elEndStart; + //} + + } + } + return elStartEnd+1; +} +function fixSelfClosed(source,elStartEnd,tagName,closeMap){ + //if(tagName in closeMap){ + var pos = closeMap[tagName]; + if(pos == null){ + //console.log(tagName) + pos = source.lastIndexOf('') + if(pos',start+4); + //append comment source.substring(4,end)//