Opened 12 years ago

Closed 12 years ago

#2295 closed defect (wontfix)

CDATA in parser

Reported by: naresh.poobahg@… Owned by: dylan
Priority: low Milestone:
Component: Parser Version: 0.4.1
Keywords: parser CDATA Cc:
Blocked By: Blocking:

Description

the XML parser does not manage the CDATA sections in dojo.xml.Parse.parseElement . The dojo programmer has to encode his or her text if it contains tags. Some code was published on the Dojo Interest List on the 12/19/2006 to solve this problem.

Change History (2)

comment:1 Changed 12 years ago by naresh.poobahg@…

Here is the code that I use. It is well tested (used a lot in a Ajax/Dojo? web site), hope this helps :

this.parseElement = function(node, hasParentNodeSet, optimizeForDojoML, thisIdx){

var parsedNodeSet = {};

var tagName = getTagName(node); There's a weird bug in IE where it counts end tags, e.g. </dojo:button> as nodes that should be parsed. Ignore these if((tagName)&&(tagName.indexOf("/")==0)){

return null;

}

look for a dojoml qualified name process dojoml only when optimizeForDojoML is true var process = true; if(optimizeForDojoML){

var dojoTagName = getDojoTagName(node);

tagName = dojoTagName
tagName;

process = Boolean(dojoTagName);

} try{

var attr = node.getAttribute("parseWidgets"); if(attr && attr.toLowerCase() == "false"){

return {};

}

}catch(e){/*continue*/}

parsedNodeSet[tagName] = []; var pos = tagName.indexOf(":"); if(pos>0){

var ns = tagName.substring(0,pos); parsedNodeSetns? = ns; honor user namespace filters if((dojo.ns)&&(!dojo.ns.allow(ns))){process=false;}

}

if(process){

var attributeSet = this.parseAttributes(node); for(var attr in attributeSet){

if((!parsedNodeSet[tagName][attr])
(typeof parsedNodeSet[tagName][attr] != "array")){

parsedNodeSet[tagName][attr] = [];

} parsedNodeSet[tagName][attr].push(attributeSet[attr]);

} FIXME: we might want to make this optional or provide cloning instead of referencing, but for now, we include a node reference to allow instantiated components to figure out their "roots" parsedNodeSet[tagName].nodeRef = node; parsedNodeSet.tagName = tagName;

parsedNodeSet.index = thisIdx
0;

}

var count = 0; var hasNoChild = true; for(var i = 0; i < node.childNodes.length; i++){

var tcn = node.childNodes.item(i); switch(tcn.nodeType){

case dojo.dom.ELEMENT_NODE: element nodes, call this function recursively

count++; hasNoChild = false;

var ctn = getDojoTagName(tcn)
getTagName(tcn);

if(!parsedNodeSet[ctn]){

parsedNodeSet[ctn] = [];

} parsedNodeSet[ctn].push(this.parseElement(tcn, true, optimizeForDojoML, count));

if( (tcn.childNodes.length == 1) &&

((tcn.childNodes.item(0).nodeType == dojo.dom.TEXT_NODE)

(tcn.childNodes.item(0).nodeType == dojo.dom.CDATA_SECTION_NODE)) ) {

simple cases (99.99% of the cases) parsedNodeSet[ctn][parsedNodeSet[ctn].length-1].value = tcn.childNodes.item(0).nodeValue;

} break;

default: break; /* case dojo.dom.ATTRIBUTE_NODE: attribute node... not meaningful here

break;

case dojo.dom.TEXT_NODE: case dojo.dom.CDATA_SECTION_NODE: if a single text node is the child, treat it as an attribute

if(node.childNodes.length == 1){

parsedNodeSet[tagName].push({ value: node.childNodes.item(0).nodeValue });

} break;

case dojo.dom.ENTITY_REFERENCE_NODE: entity reference node... not meaningful here

break;

case dojo.dom.ENTITY_NODE: entity node... not sure if this would ever be meaningful

break;

case dojo.dom.PROCESSING_INSTRUCTION_NODE: processing instruction node... not meaningful here

break;

case dojo.dom.COMMENT_NODE: comment node... not not sure if this would ever be meaningful

break;

case dojo.dom.DOCUMENT_NODE: document node... not sure if this would ever be meaningful

break;

case dojo.dom.DOCUMENT_TYPE_NODE: document type node... not meaningful here

break;

case dojo.dom.DOCUMENT_FRAGMENT_NODE: document fragment node... not meaningful here

break;

case dojo.dom.NOTATION_NODE:// notation node... not meaningful here

break;

*/

}

}

if(hasNoChild){

node contains only CDATA_SECTION_NODEs and/or TEXT_NODEs parsedNodeSet = ({ value:dojo.dom.textContent(node) });

} return (hasParentNodeSet) ? parsedNodeSet[node.tagName] : parsedNodeSet; if(parsedNodeSet.tagName)dojo.debug("parseElement: RETURNING NODE WITH TAGNAME "+parsedNodeSet.tagName); return parsedNodeSet;

};

/* parses a set of attributes on a node into an object tree */ this.parseAttributes = function(node){

var parsedAttributeSet = {}; var atts = node.attributes; TODO: should we allow for duplicate attributes at this point... would any of the relevant dom implementations even allow this? var attnode, i=0; while((attnode=atts[i++])){

if((dojo.render.html.capable)&&(dojo.render.html.ie)){

if(!attnode){ continue; } if((typeof attnode == "object")&&

(typeof attnode.nodeValue == 'undefined') (attnode.nodeValue == null)

(attnode.nodeValue == )){ continue;

}

}

var nn = attnode.nodeName.split(":"); nn = (nn.length == 2) ? nn[1] : attnode.nodeName;

parsedAttributeSet[nn] = {

value: attnode.nodeValue

};

} return parsedAttributeSet;

};

comment:2 Changed 12 years ago by bill

Resolution: wontfix
Status: newclosed

The whole XML parser is gone in 0.9.

Note: See TracTickets for help on using tickets.