001    /*
002     $Id: XmlParser.java 4132 2006-10-18 08:24:58Z paulk $
003    
004     Copyright 2003 (C) James Strachan and Bob Mcwhirter. All Rights Reserved.
005    
006     Redistribution and use of this software and associated documentation
007     ("Software"), with or without modification, are permitted provided
008     that the following conditions are met:
009    
010     1. Redistributions of source code must retain copyright
011        statements and notices.  Redistributions must also contain a
012        copy of this document.
013    
014     2. Redistributions in binary form must reproduce the
015        above copyright notice, this list of conditions and the
016        following disclaimer in the documentation and/or other
017        materials provided with the distribution.
018    
019     3. The name "groovy" must not be used to endorse or promote
020        products derived from this Software without prior written
021        permission of The Codehaus.  For written permission,
022        please contact info@codehaus.org.
023    
024     4. Products derived from this Software may not be called "groovy"
025        nor may "groovy" appear in their names without prior written
026        permission of The Codehaus. "groovy" is a registered
027        trademark of The Codehaus.
028    
029     5. Due credit should be given to The Codehaus -
030        http://groovy.codehaus.org/
031    
032     THIS SOFTWARE IS PROVIDED BY THE CODEHAUS AND CONTRIBUTORS
033     ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
034     NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
035     FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
036     THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
037     INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
038     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
039     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
040     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
041     STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
042     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
043     OF THE POSSIBILITY OF SUCH DAMAGE.
044    
045     */
046    package groovy.util;
047    
048    import groovy.xml.QName;
049    import groovy.xml.FactorySupport;
050    
051    import java.io.File;
052    import java.io.FileInputStream;
053    import java.io.IOException;
054    import java.io.InputStream;
055    import java.io.Reader;
056    import java.io.StringReader;
057    import java.util.ArrayList;
058    import java.util.HashMap;
059    import java.util.List;
060    import java.util.Map;
061    
062    import javax.xml.parsers.ParserConfigurationException;
063    import javax.xml.parsers.SAXParser;
064    import javax.xml.parsers.SAXParserFactory;
065    
066    import org.xml.sax.*;
067    
068    /**
069     * A helper class for parsing XML into a tree of Node instances for 
070     * a simple way of processing XML. This parser does not preserve the
071     * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
072     * This parser ignores comments and processing instructions and converts the
073     * XML into a Node for each element in the XML with attributes
074     * and child Nodes and Strings. This simple model is sufficient for
075     * most simple use cases of processing XML.
076     * 
077     * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
078     * @version $Revision: 4132 $
079     */
080    public class XmlParser implements ContentHandler {
081    
082        private StringBuffer bodyText = new StringBuffer();
083        private List stack = new ArrayList();
084        private Locator locator;
085        private XMLReader reader;
086        private Node parent;
087        private boolean trimWhitespace = true;
088    
089        public XmlParser() throws ParserConfigurationException, SAXException {
090            this(false, true);
091        }
092    
093        public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
094            SAXParserFactory factory = FactorySupport.createSaxParserFactory();
095            factory.setNamespaceAware(namespaceAware);
096            factory.setValidating(validating);
097            reader = factory.newSAXParser().getXMLReader();
098        }
099    
100        public XmlParser(XMLReader reader) {
101            this.reader = reader;
102        }
103    
104        public XmlParser(SAXParser parser) throws SAXException {
105            reader = parser.getXMLReader();
106        }
107    
108    
109        /**
110         * Parses the content of the given file as XML turning it into a tree
111         * of Nodes
112         */
113        public Node parse(File file) throws IOException, SAXException {
114            InputSource input = new InputSource(new FileInputStream(file));
115            input.setSystemId("file://" + file.getAbsolutePath());
116            getXMLReader().parse(input);
117            return parent;
118    
119        }
120    
121        /**
122         * Parse the content of the specified input source into a tree of Nodes.
123         */
124        public Node parse(InputSource input) throws IOException, SAXException {
125            getXMLReader().parse(input);
126            return parent;
127        }
128    
129        /**
130         * Parse the content of the specified input stream into a tree of Nodes.
131         * Note that using this method will not provide the parser with any URI
132         * for which to find DTDs etc
133         */
134        public Node parse(InputStream input) throws IOException, SAXException {
135            InputSource is = new InputSource(input);
136            getXMLReader().parse(is);
137            return parent;
138        }
139    
140        /**
141         * Parse the content of the specified reader into a tree of Nodes.
142         * Note that using this method will not provide the parser with any URI
143         * for which to find DTDs etc
144         */
145        public Node parse(Reader in) throws IOException, SAXException {
146            InputSource is = new InputSource(in);
147            getXMLReader().parse(is);
148            return parent;
149        }
150    
151        /**
152         * Parse the content of the specified URI into a tree of Nodes
153         */
154        public Node parse(String uri) throws IOException, SAXException {
155            InputSource is = new InputSource(uri);
156            getXMLReader().parse(is);
157            return parent;
158        }
159    
160        /**
161         * A helper method to parse the given text as XML
162         * 
163         * @param text
164         */
165        public Node parseText(String text) throws IOException, SAXException {
166            return parse(new StringReader(text));
167        }
168        // Delegated XMLReader methods
169        //------------------------------------------------------------------------
170    
171        /* (non-Javadoc)
172         * @see org.xml.sax.XMLReader#getDTDHandler()
173         */
174        public DTDHandler getDTDHandler() {
175            return this.reader.getDTDHandler();
176        }
177    
178        /* (non-Javadoc)
179         * @see org.xml.sax.XMLReader#getEntityResolver()
180         */
181        public EntityResolver getEntityResolver() {
182            return this.reader.getEntityResolver();
183        }
184    
185        /* (non-Javadoc)
186         * @see org.xml.sax.XMLReader#getErrorHandler()
187         */
188        public ErrorHandler getErrorHandler() {
189            return this.reader.getErrorHandler();
190        }
191    
192        /* (non-Javadoc)
193         * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
194         */
195        public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
196            return this.reader.getFeature(uri);
197        }
198    
199        /* (non-Javadoc)
200         * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
201         */
202        public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException {
203            return this.reader.getProperty(uri);
204        }
205    
206        /* (non-Javadoc)
207         * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
208         */
209        public void setDTDHandler(final DTDHandler dtdHandler) {
210            this.reader.setDTDHandler(dtdHandler);
211        }
212    
213        /* (non-Javadoc)
214         * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
215         */
216        public void setEntityResolver(final EntityResolver entityResolver) {
217            this.reader.setEntityResolver(entityResolver);
218        }
219    
220        /* (non-Javadoc)
221         * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
222         */
223        public void setErrorHandler(final ErrorHandler errorHandler) {
224            this.reader.setErrorHandler(errorHandler);
225        }
226    
227        /* (non-Javadoc)
228         * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
229         */
230        public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException {
231            this.reader.setFeature(uri, value);
232        }
233    
234        /* (non-Javadoc)
235         * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
236         */
237        public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException {
238             this.reader.setProperty(uri, value);
239        }
240    
241        // ContentHandler interface
242        //-------------------------------------------------------------------------                    
243        public void startDocument() throws SAXException {
244            parent = null;
245        }
246    
247        public void endDocument() throws SAXException {
248            stack.clear();
249        }
250    
251        public void startElement(String namespaceURI, String localName, String qName, Attributes list)
252            throws SAXException {
253            addTextToNode();
254    
255            Object name = getElementName(namespaceURI, localName, qName);
256    
257            int size = list.getLength();
258            Map attributes = new HashMap(size);
259            for (int i = 0; i < size; i++) {
260                Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i));
261                String value = list.getValue(i);
262                attributes.put(attributeName, value);
263            }
264            parent = new Node(parent, name, attributes, new ArrayList());
265            stack.add(parent);
266        }
267    
268        public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
269            addTextToNode();
270    
271            if (!stack.isEmpty()) {
272                stack.remove(stack.size() - 1);
273                if (!stack.isEmpty()) {
274                    parent = (Node) stack.get(stack.size() - 1);
275                }
276            }
277        }
278    
279        public void characters(char buffer[], int start, int length) throws SAXException {
280            bodyText.append(buffer, start, length);
281        }
282    
283        public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException {
284        }
285    
286        public void endPrefixMapping(String prefix) throws SAXException {
287        }
288    
289        public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException {
290        }
291    
292        public void processingInstruction(String target, String data) throws SAXException {
293        }
294    
295        public Locator getDocumentLocator() {
296            return locator;
297        }
298    
299        public void setDocumentLocator(Locator locator) {
300            this.locator = locator;
301        }
302    
303        public void skippedEntity(String name) throws SAXException {
304        }
305    
306        // Implementation methods
307        //-------------------------------------------------------------------------           
308        protected XMLReader getXMLReader() {
309            reader.setContentHandler(this);
310            return reader;
311        }
312    
313        protected void addTextToNode() {
314            String text = bodyText.toString();
315            if (trimWhitespace) {
316                text = text.trim();
317            }
318            if (text.length() > 0) {
319                parent.children().add(text);
320            }
321            bodyText = new StringBuffer();
322        }
323    
324        protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException {
325            String name = localName;
326            if ((name == null) || (name.length() < 1)) {
327                name = qName;
328            }
329            if (namespaceURI == null || namespaceURI.length() <= 0) {
330                return name;
331            }
332            else {
333                return new QName(namespaceURI, name, qName);
334            }
335        }
336    }