1   
2   
3   
4   
5   
6   
7   
8   package org.dom4j.io;
9   
10  import java.io.BufferedReader;
11  import java.io.CharArrayReader;
12  import java.io.File;
13  import java.io.FileReader;
14  import java.io.IOException;
15  import java.io.InputStream;
16  import java.io.InputStreamReader;
17  import java.io.Reader;
18  import java.net.URL;
19  
20  import org.dom4j.Document;
21  import org.dom4j.DocumentException;
22  import org.dom4j.DocumentFactory;
23  import org.dom4j.Element;
24  import org.dom4j.ElementHandler;
25  import org.dom4j.QName;
26  
27  import org.xmlpull.v1.XmlPullParser;
28  import org.xmlpull.v1.XmlPullParserException;
29  import org.xmlpull.v1.XmlPullParserFactory;
30  
31  /***
32   * <p>
33   * <code>XPP3Reader</code> is a Reader of DOM4J documents that uses the fast
34   * <a href="http://www.extreme.indiana.edu/soap/xpp/">XML Pull Parser 3.x </a>.
35   * It is very fast for use in SOAP style environments.
36   * </p>
37   * 
38   * @author <a href="mailto:pelle@neubia.com">Pelle Braendgaard </a>
39   * @author <a href="mailto:jstrachan@apache.org">James Strachan </a>
40   * @version $Revision: 1.3 $
41   */
42  public class XPP3Reader {
43      /*** <code>DocumentFactory</code> used to create new document objects */
44      private DocumentFactory factory;
45  
46      /*** <code>XmlPullParser</code> used to parse XML */
47      private XmlPullParser xppParser;
48  
49      /*** <code>XmlPullParser</code> used to parse XML */
50      private XmlPullParserFactory xppFactory;
51  
52      /*** DispatchHandler to call when each <code>Element</code> is encountered */
53      private DispatchHandler dispatchHandler;
54  
55      public XPP3Reader() {
56      }
57  
58      public XPP3Reader(DocumentFactory factory) {
59          this.factory = factory;
60      }
61  
62      /***
63       * <p>
64       * Reads a Document from the given <code>File</code>
65       * </p>
66       * 
67       * @param file
68       *            is the <code>File</code> to read from.
69       * 
70       * @return the newly created Document instance
71       * 
72       * @throws DocumentException
73       *             if an error occurs during parsing.
74       * @throws IOException
75       *             if a URL could not be made for the given File
76       * @throws XmlPullParserException
77       *             DOCUMENT ME!
78       */
79      public Document read(File file) throws DocumentException, IOException,
80              XmlPullParserException {
81          String systemID = file.getAbsolutePath();
82  
83          return read(new BufferedReader(new FileReader(file)), systemID);
84      }
85  
86      /***
87       * <p>
88       * Reads a Document from the given <code>URL</code>
89       * </p>
90       * 
91       * @param url
92       *            <code>URL</code> to read from.
93       * 
94       * @return the newly created Document instance
95       * 
96       * @throws DocumentException
97       *             if an error occurs during parsing.
98       * @throws IOException
99       *             DOCUMENT ME!
100      * @throws XmlPullParserException
101      *             DOCUMENT ME!
102      */
103     public Document read(URL url) throws DocumentException, IOException,
104             XmlPullParserException {
105         String systemID = url.toExternalForm();
106 
107         return read(createReader(url.openStream()), systemID);
108     }
109 
110     /***
111      * <p>
112      * Reads a Document from the given URL or filename.
113      * </p>
114      * 
115      * <p>
116      * If the systemID contains a <code>':'</code> character then it is
117      * assumed to be a URL otherwise its assumed to be a file name. If you want
118      * finer grained control over this mechansim then please explicitly pass in
119      * either a {@link URL}or a {@link File}instance instead of a {@link
120      * String} to denote the source of the document.
121      * </p>
122      * 
123      * @param systemID
124      *            is a URL for a document or a file name.
125      * 
126      * @return the newly created Document instance
127      * 
128      * @throws DocumentException
129      *             if an error occurs during parsing.
130      * @throws IOException
131      *             if a URL could not be made for the given File
132      * @throws XmlPullParserException
133      *             DOCUMENT ME!
134      */
135     public Document read(String systemID) throws DocumentException,
136             IOException, XmlPullParserException {
137         if (systemID.indexOf(':') >= 0) {
138             
139             return read(new URL(systemID));
140         } else {
141             
142             return read(new File(systemID));
143         }
144     }
145 
146     /***
147      * <p>
148      * Reads a Document from the given stream
149      * </p>
150      * 
151      * @param in
152      *            <code>InputStream</code> to read from.
153      * 
154      * @return the newly created Document instance
155      * 
156      * @throws DocumentException
157      *             if an error occurs during parsing.
158      * @throws IOException
159      *             DOCUMENT ME!
160      * @throws XmlPullParserException
161      *             DOCUMENT ME!
162      */
163     public Document read(InputStream in) throws DocumentException, IOException,
164             XmlPullParserException {
165         return read(createReader(in));
166     }
167 
168     /***
169      * <p>
170      * Reads a Document from the given <code>Reader</code>
171      * </p>
172      * 
173      * @param reader
174      *            is the reader for the input
175      * 
176      * @return the newly created Document instance
177      * 
178      * @throws DocumentException
179      *             if an error occurs during parsing.
180      * @throws IOException
181      *             DOCUMENT ME!
182      * @throws XmlPullParserException
183      *             DOCUMENT ME!
184      */
185     public Document read(Reader reader) throws DocumentException, IOException,
186             XmlPullParserException {
187         getXPPParser().setInput(reader);
188 
189         return parseDocument();
190     }
191 
192     /***
193      * <p>
194      * Reads a Document from the given array of characters
195      * </p>
196      * 
197      * @param text
198      *            is the text to parse
199      * 
200      * @return the newly created Document instance
201      * 
202      * @throws DocumentException
203      *             if an error occurs during parsing.
204      * @throws IOException
205      *             DOCUMENT ME!
206      * @throws XmlPullParserException
207      *             DOCUMENT ME!
208      */
209     public Document read(char[] text) throws DocumentException, IOException,
210             XmlPullParserException {
211         getXPPParser().setInput(new CharArrayReader(text));
212 
213         return parseDocument();
214     }
215 
216     /***
217      * <p>
218      * Reads a Document from the given stream
219      * </p>
220      * 
221      * @param in
222      *            <code>InputStream</code> to read from.
223      * @param systemID
224      *            is the URI for the input
225      * 
226      * @return the newly created Document instance
227      * 
228      * @throws DocumentException
229      *             if an error occurs during parsing.
230      * @throws IOException
231      *             DOCUMENT ME!
232      * @throws XmlPullParserException
233      *             DOCUMENT ME!
234      */
235     public Document read(InputStream in, String systemID)
236             throws DocumentException, IOException, XmlPullParserException {
237         return read(createReader(in), systemID);
238     }
239 
240     /***
241      * <p>
242      * Reads a Document from the given <code>Reader</code>
243      * </p>
244      * 
245      * @param reader
246      *            is the reader for the input
247      * @param systemID
248      *            is the URI for the input
249      * 
250      * @return the newly created Document instance
251      * 
252      * @throws DocumentException
253      *             if an error occurs during parsing.
254      * @throws IOException
255      *             DOCUMENT ME!
256      * @throws XmlPullParserException
257      *             DOCUMENT ME!
258      */
259     public Document read(Reader reader, String systemID)
260             throws DocumentException, IOException, XmlPullParserException {
261         Document document = read(reader);
262         document.setName(systemID);
263 
264         return document;
265     }
266 
267     
268     
269     public XmlPullParser getXPPParser() throws XmlPullParserException {
270         if (xppParser == null) {
271             xppParser = getXPPFactory().newPullParser();
272         }
273 
274         return xppParser;
275     }
276 
277     public XmlPullParserFactory getXPPFactory() throws XmlPullParserException {
278         if (xppFactory == null) {
279             xppFactory = XmlPullParserFactory.newInstance();
280         }
281 
282         xppFactory.setNamespaceAware(true);
283 
284         return xppFactory;
285     }
286 
287     public void setXPPFactory(XmlPullParserFactory xPPfactory) {
288         this.xppFactory = xPPfactory;
289     }
290 
291     /***
292      * DOCUMENT ME!
293      * 
294      * @return the <code>DocumentFactory</code> used to create document
295      *         objects
296      */
297     public DocumentFactory getDocumentFactory() {
298         if (factory == null) {
299             factory = DocumentFactory.getInstance();
300         }
301 
302         return factory;
303     }
304 
305     /***
306      * <p>
307      * This sets the <code>DocumentFactory</code> used to create new
308      * documents. This method allows the building of custom DOM4J tree objects
309      * to be implemented easily using a custom derivation of
310      * {@link DocumentFactory}
311      * </p>
312      * 
313      * @param documentFactory
314      *            <code>DocumentFactory</code> used to create DOM4J objects
315      */
316     public void setDocumentFactory(DocumentFactory documentFactory) {
317         this.factory = documentFactory;
318     }
319 
320     /***
321      * Adds the <code>ElementHandler</code> to be called when the specified
322      * path is encounted.
323      * 
324      * @param path
325      *            is the path to be handled
326      * @param handler
327      *            is the <code>ElementHandler</code> to be called by the event
328      *            based processor.
329      */
330     public void addHandler(String path, ElementHandler handler) {
331         getDispatchHandler().addHandler(path, handler);
332     }
333 
334     /***
335      * Removes the <code>ElementHandler</code> from the event based processor,
336      * for the specified path.
337      * 
338      * @param path
339      *            is the path to remove the <code>ElementHandler</code> for.
340      */
341     public void removeHandler(String path) {
342         getDispatchHandler().removeHandler(path);
343     }
344 
345     /***
346      * When multiple <code>ElementHandler</code> instances have been
347      * registered, this will set a default <code>ElementHandler</code> to be
348      * called for any path which does <b>NOT </b> have a handler registered.
349      * 
350      * @param handler
351      *            is the <code>ElementHandler</code> to be called by the event
352      *            based processor.
353      */
354     public void setDefaultHandler(ElementHandler handler) {
355         getDispatchHandler().setDefaultHandler(handler);
356     }
357 
358     
359     
360     protected Document parseDocument() throws DocumentException, IOException,
361             XmlPullParserException {
362         DocumentFactory df = getDocumentFactory();
363         Document document = df.createDocument();
364         Element parent = null;
365         XmlPullParser pp = getXPPParser();
366         pp.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true);
367 
368         while (true) {
369             int type = pp.nextToken();
370 
371             switch (type) {
372                 case XmlPullParser.PROCESSING_INSTRUCTION: {
373                     String text = pp.getText();
374                     int loc = text.indexOf(" ");
375 
376                     if (loc >= 0) {
377                         String target = text.substring(0, loc);
378                         String txt = text.substring(loc + 1);
379                         document.addProcessingInstruction(target, txt);
380                     } else {
381                         document.addProcessingInstruction(text, "");
382                     }
383 
384                     break;
385                 }
386 
387                 case XmlPullParser.COMMENT: {
388                     if (parent != null) {
389                         parent.addComment(pp.getText());
390                     } else {
391                         document.addComment(pp.getText());
392                     }
393 
394                     break;
395                 }
396 
397                 case XmlPullParser.CDSECT: {
398                     if (parent != null) {
399                         parent.addCDATA(pp.getText());
400                     } else {
401                         String msg = "Cannot have text content outside of the "
402                                 + "root document";
403                         throw new DocumentException(msg);
404                     }
405 
406                     break;
407                 }
408 
409                 case XmlPullParser.ENTITY_REF:
410                     break;
411 
412                 case XmlPullParser.END_DOCUMENT:
413                     return document;
414 
415                 case XmlPullParser.START_TAG: {
416                     QName qname = (pp.getPrefix() == null) ? df.createQName(pp
417                             .getName(), pp.getNamespace()) : df.createQName(pp
418                             .getName(), pp.getPrefix(), pp.getNamespace());
419                     Element newElement = df.createElement(qname);
420                     int nsStart = pp.getNamespaceCount(pp.getDepth() - 1);
421                     int nsEnd = pp.getNamespaceCount(pp.getDepth());
422 
423                     for (int i = nsStart; i < nsEnd; i++) {
424                         if (pp.getNamespacePrefix(i) != null) {
425                             newElement.addNamespace(pp.getNamespacePrefix(i),
426                                     pp.getNamespaceUri(i));
427                         }
428                     }
429 
430                     for (int i = 0; i < pp.getAttributeCount(); i++) {
431                         QName qa = (pp.getAttributePrefix(i) == null) ? df
432                                 .createQName(pp.getAttributeName(i)) : df
433                                 .createQName(pp.getAttributeName(i), pp
434                                         .getAttributePrefix(i), pp
435                                         .getAttributeNamespace(i));
436                         newElement.addAttribute(qa, pp.getAttributeValue(i));
437                     }
438 
439                     if (parent != null) {
440                         parent.add(newElement);
441                     } else {
442                         document.add(newElement);
443                     }
444 
445                     parent = newElement;
446 
447                     break;
448                 }
449 
450                 case XmlPullParser.END_TAG: {
451                     if (parent != null) {
452                         parent = parent.getParent();
453                     }
454 
455                     break;
456                 }
457 
458                 case XmlPullParser.TEXT: {
459                     String text = pp.getText();
460 
461                     if (parent != null) {
462                         parent.addText(text);
463                     } else {
464                         String msg = "Cannot have text content outside of the "
465                                 + "root document";
466                         throw new DocumentException(msg);
467                     }
468 
469                     break;
470                 }
471 
472                 default:
473                     break;
474             }
475         }
476     }
477 
478     protected DispatchHandler getDispatchHandler() {
479         if (dispatchHandler == null) {
480             dispatchHandler = new DispatchHandler();
481         }
482 
483         return dispatchHandler;
484     }
485 
486     protected void setDispatchHandler(DispatchHandler dispatchHandler) {
487         this.dispatchHandler = dispatchHandler;
488     }
489 
490     /***
491      * Factory method to create a Reader from the given InputStream.
492      * 
493      * @param in
494      *            DOCUMENT ME!
495      * 
496      * @return DOCUMENT ME!
497      * 
498      * @throws IOException
499      *             DOCUMENT ME!
500      */
501     protected Reader createReader(InputStream in) throws IOException {
502         return new BufferedReader(new InputStreamReader(in));
503     }
504 }
505 
506 
507 
508 
509 
510 
511 
512 
513 
514 
515 
516 
517 
518 
519 
520 
521 
522 
523 
524 
525 
526 
527 
528 
529 
530 
531 
532 
533 
534 
535 
536 
537 
538 
539 
540 
541