1   
2   
3   
4   
5   
6   
7   
8   package org.dom4j.io;
9   
10  import java.io.BufferedReader;
11  import java.io.File;
12  import java.io.FileReader;
13  import java.io.IOException;
14  import java.io.InputStream;
15  import java.io.InputStreamReader;
16  import java.io.Reader;
17  import java.net.URL;
18  
19  import org.dom4j.Document;
20  import org.dom4j.DocumentException;
21  import org.dom4j.DocumentFactory;
22  import org.dom4j.Element;
23  import org.dom4j.ElementHandler;
24  import org.dom4j.xpp.ProxyXmlStartTag;
25  
26  import org.gjt.xpp.XmlEndTag;
27  import org.gjt.xpp.XmlPullParser;
28  import org.gjt.xpp.XmlPullParserException;
29  import org.gjt.xpp.XmlPullParserFactory;
30  
31  /***
32   * <p>
33   * <code>XPPReader</code> is a Reader of DOM4J documents that uses the fast <a
34   * href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 2.x </a>. It
35   * does not currently support comments, CDATA or ProcessingInstructions or
36   * validation but it is very fast for use in SOAP style environments.
37   * </p>
38   * 
39   * @author <a href="mailto:jstrachan@apache.org">James Strachan </a>
40   * @version $Revision: 1.7 $
41   */
42  public class XPPReader {
43      /*** <code>DocumentFactory</code> used to create new document objects */
44      private DocumentFactory factory;
45  
46      /*** <code>XmlPullParser</code> used to parse XML */
47      private XmlPullParser xppParser;
48  
49      /*** <code>XmlPullParser</code> used to parse XML */
50      private XmlPullParserFactory xppFactory;
51  
52      /*** DispatchHandler to call when each <code>Element</code> is encountered */
53      private DispatchHandler dispatchHandler;
54  
55      public XPPReader() {
56      }
57  
58      public XPPReader(DocumentFactory factory) {
59          this.factory = factory;
60      }
61  
62      /***
63       * <p>
64       * Reads a Document from the given <code>File</code>
65       * </p>
66       * 
67       * @param file
68       *            is the <code>File</code> to read from.
69       * 
70       * @return the newly created Document instance
71       * 
72       * @throws DocumentException
73       *             if an error occurs during parsing.
74       * @throws IOException
75       *             if a URL could not be made for the given File
76       * @throws XmlPullParserException
77       *             DOCUMENT ME!
78       */
79      public Document read(File file) throws DocumentException, IOException,
80              XmlPullParserException {
81          String systemID = file.getAbsolutePath();
82  
83          return read(new BufferedReader(new FileReader(file)), systemID);
84      }
85  
86      /***
87       * <p>
88       * Reads a Document from the given <code>URL</code>
89       * </p>
90       * 
91       * @param url
92       *            <code>URL</code> to read from.
93       * 
94       * @return the newly created Document instance
95       * 
96       * @throws DocumentException
97       *             if an error occurs during parsing.
98       * @throws IOException
99       *             DOCUMENT ME!
100      * @throws XmlPullParserException
101      *             DOCUMENT ME!
102      */
103     public Document read(URL url) throws DocumentException, IOException,
104             XmlPullParserException {
105         String systemID = url.toExternalForm();
106 
107         return read(createReader(url.openStream()), systemID);
108     }
109 
110     /***
111      * <p>
112      * Reads a Document from the given URL or filename.
113      * </p>
114      * 
115      * <p>
116      * If the systemID contains a <code>':'</code> character then it is
117      * assumed to be a URL otherwise its assumed to be a file name. If you want
118      * finer grained control over this mechansim then please explicitly pass in
119      * either a {@link URL}or a {@link File}instance instead of a {@link
120      * String} to denote the source of the document.
121      * </p>
122      * 
123      * @param systemID
124      *            is a URL for a document or a file name.
125      * 
126      * @return the newly created Document instance
127      * 
128      * @throws DocumentException
129      *             if an error occurs during parsing.
130      * @throws IOException
131      *             if a URL could not be made for the given File
132      * @throws XmlPullParserException
133      *             DOCUMENT ME!
134      */
135     public Document read(String systemID) throws DocumentException,
136             IOException, XmlPullParserException {
137         if (systemID.indexOf(':') >= 0) {
138             
139             return read(new URL(systemID));
140         } else {
141             
142             return read(new File(systemID));
143         }
144     }
145 
146     /***
147      * <p>
148      * Reads a Document from the given stream
149      * </p>
150      * 
151      * @param in
152      *            <code>InputStream</code> to read from.
153      * 
154      * @return the newly created Document instance
155      * 
156      * @throws DocumentException
157      *             if an error occurs during parsing.
158      * @throws IOException
159      *             DOCUMENT ME!
160      * @throws XmlPullParserException
161      *             DOCUMENT ME!
162      */
163     public Document read(InputStream in) throws DocumentException, IOException,
164             XmlPullParserException {
165         return read(createReader(in));
166     }
167 
168     /***
169      * <p>
170      * Reads a Document from the given <code>Reader</code>
171      * </p>
172      * 
173      * @param reader
174      *            is the reader for the input
175      * 
176      * @return the newly created Document instance
177      * 
178      * @throws DocumentException
179      *             if an error occurs during parsing.
180      * @throws IOException
181      *             DOCUMENT ME!
182      * @throws XmlPullParserException
183      *             DOCUMENT ME!
184      */
185     public Document read(Reader reader) throws DocumentException, IOException,
186             XmlPullParserException {
187         getXPPParser().setInput(reader);
188 
189         return parseDocument();
190     }
191 
192     /***
193      * <p>
194      * Reads a Document from the given array of characters
195      * </p>
196      * 
197      * @param text
198      *            is the text to parse
199      * 
200      * @return the newly created Document instance
201      * 
202      * @throws DocumentException
203      *             if an error occurs during parsing.
204      * @throws IOException
205      *             DOCUMENT ME!
206      * @throws XmlPullParserException
207      *             DOCUMENT ME!
208      */
209     public Document read(char[] text) throws DocumentException, IOException,
210             XmlPullParserException {
211         getXPPParser().setInput(text);
212 
213         return parseDocument();
214     }
215 
216     /***
217      * <p>
218      * Reads a Document from the given stream
219      * </p>
220      * 
221      * @param in
222      *            <code>InputStream</code> to read from.
223      * @param systemID
224      *            is the URI for the input
225      * 
226      * @return the newly created Document instance
227      * 
228      * @throws DocumentException
229      *             if an error occurs during parsing.
230      * @throws IOException
231      *             DOCUMENT ME!
232      * @throws XmlPullParserException
233      *             DOCUMENT ME!
234      */
235     public Document read(InputStream in, String systemID)
236             throws DocumentException, IOException, XmlPullParserException {
237         return read(createReader(in), systemID);
238     }
239 
240     /***
241      * <p>
242      * Reads a Document from the given <code>Reader</code>
243      * </p>
244      * 
245      * @param reader
246      *            is the reader for the input
247      * @param systemID
248      *            is the URI for the input
249      * 
250      * @return the newly created Document instance
251      * 
252      * @throws DocumentException
253      *             if an error occurs during parsing.
254      * @throws IOException
255      *             DOCUMENT ME!
256      * @throws XmlPullParserException
257      *             DOCUMENT ME!
258      */
259     public Document read(Reader reader, String systemID)
260             throws DocumentException, IOException, XmlPullParserException {
261         Document document = read(reader);
262         document.setName(systemID);
263 
264         return document;
265     }
266 
267     
268     
269     public XmlPullParser getXPPParser() throws XmlPullParserException {
270         if (xppParser == null) {
271             xppParser = getXPPFactory().newPullParser();
272         }
273 
274         return xppParser;
275     }
276 
277     public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
278         if (xppFactory == null) {
279             xppFactory = XmlPullParserFactory.newInstance();
280         }
281 
282         return xppFactory;
283     }
284 
285     public void setXPPFactory(XmlPullParserFactory xPPFactory) {
286         this.xppFactory = xPPFactory;
287     }
288 
289     /***
290      * DOCUMENT ME!
291      * 
292      * @return the <code>DocumentFactory</code> used to create document
293      *         objects
294      */
295     public DocumentFactory getDocumentFactory() {
296         if (factory == null) {
297             factory = DocumentFactory.getInstance();
298         }
299 
300         return factory;
301     }
302 
303     /***
304      * <p>
305      * This sets the <code>DocumentFactory</code> used to create new
306      * documents. This method allows the building of custom DOM4J tree objects
307      * to be implemented easily using a custom derivation of
308      * {@link DocumentFactory}
309      * </p>
310      * 
311      * @param documentFactory
312      *            <code>DocumentFactory</code> used to create DOM4J objects
313      */
314     public void setDocumentFactory(DocumentFactory documentFactory) {
315         this.factory = documentFactory;
316     }
317 
318     /***
319      * Adds the <code>ElementHandler</code> to be called when the specified
320      * path is encounted.
321      * 
322      * @param path
323      *            is the path to be handled
324      * @param handler
325      *            is the <code>ElementHandler</code> to be called by the event
326      *            based processor.
327      */
328     public void addHandler(String path, ElementHandler handler) {
329         getDispatchHandler().addHandler(path, handler);
330     }
331 
332     /***
333      * Removes the <code>ElementHandler</code> from the event based processor,
334      * for the specified path.
335      * 
336      * @param path
337      *            is the path to remove the <code>ElementHandler</code> for.
338      */
339     public void removeHandler(String path) {
340         getDispatchHandler().removeHandler(path);
341     }
342 
343     /***
344      * When multiple <code>ElementHandler</code> instances have been
345      * registered, this will set a default <code>ElementHandler</code> to be
346      * called for any path which does <b>NOT </b> have a handler registered.
347      * 
348      * @param handler
349      *            is the <code>ElementHandler</code> to be called by the event
350      *            based processor.
351      */
352     public void setDefaultHandler(ElementHandler handler) {
353         getDispatchHandler().setDefaultHandler(handler);
354     }
355 
356     
357     
358     protected Document parseDocument() throws DocumentException, IOException,
359             XmlPullParserException {
360         Document document = getDocumentFactory().createDocument();
361         Element parent = null;
362         XmlPullParser parser = getXPPParser();
363         parser.setNamespaceAware(true);
364 
365         ProxyXmlStartTag startTag = new ProxyXmlStartTag();
366         XmlEndTag endTag = xppFactory.newEndTag();
367 
368         while (true) {
369             int type = parser.next();
370 
371             switch (type) {
372                 case XmlPullParser.END_DOCUMENT:
373                     return document;
374 
375                 case XmlPullParser.START_TAG: {
376                     parser.readStartTag(startTag);
377 
378                     Element newElement = startTag.getElement();
379 
380                     if (parent != null) {
381                         parent.add(newElement);
382                     } else {
383                         document.add(newElement);
384                     }
385 
386                     parent = newElement;
387 
388                     break;
389                 }
390 
391                 case XmlPullParser.END_TAG: {
392                     parser.readEndTag(endTag);
393 
394                     if (parent != null) {
395                         parent = parent.getParent();
396                     }
397 
398                     break;
399                 }
400 
401                 case XmlPullParser.CONTENT: {
402                     String text = parser.readContent();
403 
404                     if (parent != null) {
405                         parent.addText(text);
406                     } else {
407                         String msg = "Cannot have text content outside of the "
408                                 + "root document";
409                         throw new DocumentException(msg);
410                     }
411 
412                     break;
413                 }
414 
415                 default:
416                     throw new DocumentException("Error: unknown type: " + type);
417             }
418         }
419     }
420 
421     protected DispatchHandler getDispatchHandler() {
422         if (dispatchHandler == null) {
423             dispatchHandler = new DispatchHandler();
424         }
425 
426         return dispatchHandler;
427     }
428 
429     protected void setDispatchHandler(DispatchHandler dispatchHandler) {
430         this.dispatchHandler = dispatchHandler;
431     }
432 
433     /***
434      * Factory method to create a Reader from the given InputStream.
435      * 
436      * @param in
437      *            DOCUMENT ME!
438      * 
439      * @return DOCUMENT ME!
440      * 
441      * @throws IOException
442      *             DOCUMENT ME!
443      */
444     protected Reader createReader(InputStream in) throws IOException {
445         return new BufferedReader(new InputStreamReader(in));
446     }
447 }
448 
449 
450 
451 
452 
453 
454 
455 
456 
457 
458 
459 
460 
461 
462 
463 
464 
465 
466 
467 
468 
469 
470 
471 
472 
473 
474 
475 
476 
477 
478 
479 
480 
481 
482 
483 
484