|
|||||||||||||||||||
Source file | Conditionals | Statements | Methods | TOTAL | |||||||||||||||
HTMLWriter.java | 29,5% | 37% | 37,5% | 35,5% |
|
1 | /* | |
2 | * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved. | |
3 | * | |
4 | * This software is open source. | |
5 | * See the bottom of this file for the licence. | |
6 | */ | |
7 | ||
8 | package org.dom4j.io; | |
9 | ||
10 | import java.io.IOException; | |
11 | import java.io.OutputStream; | |
12 | import java.io.StringWriter; | |
13 | import java.io.UnsupportedEncodingException; | |
14 | import java.io.Writer; | |
15 | import java.util.HashSet; | |
16 | import java.util.Iterator; | |
17 | import java.util.Set; | |
18 | import java.util.Stack; | |
19 | ||
20 | import org.dom4j.Document; | |
21 | import org.dom4j.DocumentHelper; | |
22 | import org.dom4j.Element; | |
23 | import org.dom4j.Entity; | |
24 | import org.dom4j.Node; | |
25 | ||
26 | import org.xml.sax.SAXException; | |
27 | ||
28 | /** | |
29 | * <p> | |
30 | * <code>HTMLWriter</code> takes a DOM4J tree and formats it to a stream as | |
31 | * HTML. This formatter is similar to XMLWriter but it outputs the text of CDATA | |
32 | * and Entity sections rather than the serialised format as in XML, it has an | |
33 | * XHTML mode, it retains whitespace in certain elements such as <PRE>, | |
34 | * and it supports certain elements which have no corresponding close tag such | |
35 | * as for <BR> and <P>. | |
36 | * </p> | |
37 | * | |
38 | * <p> | |
39 | * The OutputFormat passed in to the constructor is checked for isXHTML() and | |
40 | * isExpandEmptyElements(). See {@link OutputFormat OutputFormat}for details. | |
41 | * Here are the rules for <b>this class </b> based on an OutputFormat, "format", | |
42 | * passed in to the constructor: <br/><br/> | |
43 | * | |
44 | * <ul> | |
45 | * <li>If an element is in {@link #getOmitElementCloseSet() | |
46 | * getOmitElementCloseSet}, then it is treated specially: | |
47 | * | |
48 | * <ul> | |
49 | * <li>It never expands, since some browsers treat this as two separate | |
50 | * Horizontal Rules: <HR></HR></li> | |
51 | * <li>If {@link org.dom4j.io.OutputFormat#isXHTML() format.isXHTML()}, then | |
52 | * it has a space before the closing single-tag slash, since Netscape 4.x- | |
53 | * treats this: <HR /> as an element named "HR" with an attribute named | |
54 | * "/", but that's better than when it refuses to recognize this: <hr/> | |
55 | * which it thinks is an element named "HR/".</li> | |
56 | * </ul> | |
57 | * | |
58 | * </li> | |
59 | * <li>If {@link org.dom4j.io.OutputFormat#isXHTML() format.isXHTML()}, all | |
60 | * elements must have either a close element, or be a closed single tag.</li> | |
61 | * <li>If {@link org.dom4j.io.OutputFormat#isExpandEmptyElements() | |
62 | * format.isExpandEmptyElements()}() is true, all elements are expanded except | |
63 | * as above.</li> | |
64 | * </ul> | |
65 | * | |
66 | * <b>Examples </b> | |
67 | * </p> | |
68 | * | |
69 | * <p> | |
70 | * </p> | |
71 | * | |
72 | * <p> | |
73 | * If isXHTML == true, CDATA sections look like this: | |
74 | * | |
75 | * <PRE> | |
76 | * | |
77 | * <b><myelement><![CDATA[My data]]></myelement> </b> | |
78 | * | |
79 | * </PRE> | |
80 | * | |
81 | * Otherwise, they look like this: | |
82 | * | |
83 | * <PRE> | |
84 | * | |
85 | * <b><myelement>My data</myelement> </b> | |
86 | * | |
87 | * </PRE> | |
88 | * | |
89 | * </p> | |
90 | * | |
91 | * <p> | |
92 | * Basically, {@link OutputFormat.isXHTML() OutputFormat.isXHTML()} == | |
93 | * <code>true</code> will produce valid XML, while {@link | |
94 | * org.dom4j.io.OutputFormat#isExpandEmptyElements() | |
95 | * format.isExpandEmptyElements()} determines whether empty elements are | |
96 | * expanded if isXHTML is true, excepting the special HTML single tags. | |
97 | * </p> | |
98 | * | |
99 | * <p> | |
100 | * Also, HTMLWriter handles tags whose contents should be preformatted, that is, | |
101 | * whitespace-preserved. By default, this set includes the tags <PRE>, | |
102 | * <SCRIPT>, <STYLE>, and <TEXTAREA>, case insensitively. It | |
103 | * does not include <IFRAME>. Other tags, such as <CODE>, | |
104 | * <KBD>, <TT>, <VAR>, are usually rendered in a different | |
105 | * font in most browsers, but don't preserve whitespace, so they also don't | |
106 | * appear in the default list. HTML Comments are always whitespace-preserved. | |
107 | * However, the parser you use may store comments with linefeed-only text nodes | |
108 | * (\n) even if your platform uses another line.separator character, and | |
109 | * HTMLWriter outputs Comment nodes exactly as the DOM is set up by the parser. | |
110 | * See examples and discussion here: {@link#setPreformattedTags(java.util.Set) | |
111 | * setPreformattedTags} | |
112 | * </p> | |
113 | * | |
114 | * <p> | |
115 | * <b>Examples </b> | |
116 | * </p> | |
117 | * <blockquote> | |
118 | * <p> | |
119 | * <b>Pretty Printing </b> | |
120 | * </p> | |
121 | * | |
122 | * <p> | |
123 | * This example shows how to pretty print a string containing a valid HTML | |
124 | * document to a string. You can also just call the static methods of this | |
125 | * class: <br> | |
126 | * {@link #prettyPrintHTML(String) prettyPrintHTML(String)}or <br> | |
127 | * {@link #prettyPrintHTML(String,boolean,boolean,boolean,boolean) | |
128 | * prettyPrintHTML(String,boolean,boolean,boolean,boolean)} or, <br> | |
129 | * {@link #prettyPrintXHTML(String) prettyPrintXHTML(String)}for XHTML (note | |
130 | * the X) | |
131 | * </p> | |
132 | * | |
133 | * <pre> | |
134 | * String testPrettyPrint(String html) { | |
135 | * StringWriter sw = new StringWriter(); | |
136 | * OutputFormat format = OutputFormat.createPrettyPrint(); | |
137 | * // These are the default values for createPrettyPrint, | |
138 | * // so you needn't set them: | |
139 | * // format.setNewlines(true); | |
140 | * // format.setTrimText(true);</font> | |
141 | * format.setXHTML(true); | |
142 | * HTMLWriter writer = new HTMLWriter(sw, format); | |
143 | * Document document = DocumentHelper.parseText(html); | |
144 | * writer.write(document); | |
145 | * writer.flush(); | |
146 | * return sw.toString(); | |
147 | * } | |
148 | * </pre> | |
149 | * | |
150 | * <p> | |
151 | * This example shows how to create a "squeezed" document, but one that will | |
152 | * work in browsers even if the browser line length is limited. No newlines are | |
153 | * included, no extra whitespace at all, except where it it required by | |
154 | * {@link #setPreformattedTags(java.util.Set) setPreformattedTags}. | |
155 | * </p> | |
156 | * | |
157 | * <pre> | |
158 | * String testCrunch(String html) { | |
159 | * StringWriter sw = new StringWriter(); | |
160 | * OutputFormat format = OutputFormat.createPrettyPrint(); | |
161 | * format.setNewlines(false); | |
162 | * format.setTrimText(true); | |
163 | * format.setIndent(""); | |
164 | * format.setXHTML(true); | |
165 | * format.setExpandEmptyElements(false); | |
166 | * format.setNewLineAfterNTags(20); | |
167 | * org.dom4j.io.HTMLWriter writer = new HTMLWriter(sw, format); | |
168 | * org.dom4j.Document document = DocumentHelper.parseText(html); | |
169 | * writer.write(document); | |
170 | * writer.flush(); | |
171 | * return sw.toString(); | |
172 | * } | |
173 | * </pre> | |
174 | * | |
175 | * </blockquote> | |
176 | * | |
177 | * @author <a href="mailto:james.strachan@metastuff.com">James Strachan </a> | |
178 | * @author Laramie Crocker | |
179 | * @version $Revision: 1.21 $ | |
180 | */ | |
181 | public class HTMLWriter extends XMLWriter { | |
182 | private static String lineSeparator = System.getProperty("line.separator"); | |
183 | ||
184 | protected static final HashSet DEFAULT_PREFORMATTED_TAGS; | |
185 | ||
186 | static { | |
187 | // If you change this list, update the javadoc examples, above in the | |
188 | // class javadoc, in writeElement, and in setPreformattedTags(). | |
189 | 1 | DEFAULT_PREFORMATTED_TAGS = new HashSet(); |
190 | 1 | DEFAULT_PREFORMATTED_TAGS.add("PRE"); |
191 | 1 | DEFAULT_PREFORMATTED_TAGS.add("SCRIPT"); |
192 | 1 | DEFAULT_PREFORMATTED_TAGS.add("STYLE"); |
193 | 1 | DEFAULT_PREFORMATTED_TAGS.add("TEXTAREA"); |
194 | } | |
195 | ||
196 | protected static final OutputFormat DEFAULT_HTML_FORMAT; | |
197 | ||
198 | static { | |
199 | 1 | DEFAULT_HTML_FORMAT = new OutputFormat(" ", true); |
200 | 1 | DEFAULT_HTML_FORMAT.setTrimText(true); |
201 | 1 | DEFAULT_HTML_FORMAT.setSuppressDeclaration(true); |
202 | } | |
203 | ||
204 | private Stack formatStack = new Stack(); | |
205 | ||
206 | private String lastText = ""; | |
207 | ||
208 | private int tagsOuput = 0; | |
209 | ||
210 | // legal values are 0+, but -1 signifies lazy initialization. | |
211 | private int newLineAfterNTags = -1; | |
212 | ||
213 | private HashSet preformattedTags = DEFAULT_PREFORMATTED_TAGS; | |
214 | ||
215 | /** | |
216 | * Used to store the qualified element names which should have no close | |
217 | * element tag | |
218 | */ | |
219 | private HashSet omitElementCloseSet; | |
220 | ||
221 | 1 | public HTMLWriter(Writer writer) { |
222 | 1 | super(writer, DEFAULT_HTML_FORMAT); |
223 | } | |
224 | ||
225 | 5 | public HTMLWriter(Writer writer, OutputFormat format) { |
226 | 5 | super(writer, format); |
227 | } | |
228 | ||
229 | 0 | public HTMLWriter() throws UnsupportedEncodingException { |
230 | 0 | super(DEFAULT_HTML_FORMAT); |
231 | } | |
232 | ||
233 | 0 | public HTMLWriter(OutputFormat format) throws UnsupportedEncodingException { |
234 | 0 | super(format); |
235 | } | |
236 | ||
237 | 0 | public HTMLWriter(OutputStream out) throws UnsupportedEncodingException { |
238 | 0 | super(out, DEFAULT_HTML_FORMAT); |
239 | } | |
240 | ||
241 | 0 | public HTMLWriter(OutputStream out, OutputFormat format) |
242 | throws UnsupportedEncodingException { | |
243 | 0 | super(out, format); |
244 | } | |
245 | ||
246 | 0 | public void startCDATA() throws SAXException { |
247 | } | |
248 | ||
249 | 0 | public void endCDATA() throws SAXException { |
250 | } | |
251 | ||
252 | // Overloaded methods | |
253 | // added isXHTML() stuff so you get the CDATA brackets if you desire. | |
254 | 1 | protected void writeCDATA(String text) throws IOException { |
255 | // XXX: Should we escape entities? | |
256 | // writer.write( escapeElementEntities( text ) ); | |
257 | 1 | if (getOutputFormat().isXHTML()) { |
258 | 0 | super.writeCDATA(text); |
259 | } else { | |
260 | 1 | writer.write(text); |
261 | } | |
262 | ||
263 | 1 | lastOutputNodeType = Node.CDATA_SECTION_NODE; |
264 | } | |
265 | ||
266 | 0 | protected void writeEntity(Entity entity) throws IOException { |
267 | 0 | writer.write(entity.getText()); |
268 | 0 | lastOutputNodeType = Node.ENTITY_REFERENCE_NODE; |
269 | } | |
270 | ||
271 | 3 | protected void writeDeclaration() throws IOException { |
272 | } | |
273 | ||
274 | 4 | protected void writeString(String text) throws IOException { |
275 | /* | |
276 | * DOM stores \n at the end of text nodes that are newlines. This is | |
277 | * significant if we are in a PRE section. However, we only want to | |
278 | * output the system line.separator, not \n. This is a little brittle, | |
279 | * but this function appears to be called with these lineseparators as a | |
280 | * separate TEXT_NODE. If we are in a preformatted section, output the | |
281 | * right line.separator, otherwise ditch. If the single \n character is | |
282 | * not the text, then do the super thing to output the text. | |
283 | * | |
284 | * Also, we store the last text that was not a \n since it may be used | |
285 | * by writeElement in this class to line up preformatted tags. | |
286 | */ | |
287 | 4 | if (text.equals("\n")) { |
288 | 0 | if (!formatStack.empty()) { |
289 | 0 | super.writeString(lineSeparator); |
290 | } | |
291 | ||
292 | 0 | return; |
293 | } | |
294 | ||
295 | 4 | lastText = text; |
296 | ||
297 | 4 | if (formatStack.empty()) { |
298 | 4 | super.writeString(text.trim()); |
299 | } else { | |
300 | 0 | super.writeString(text); |
301 | } | |
302 | } | |
303 | ||
304 | /** | |
305 | * Overriden method to not close certain element names to avoid wierd | |
306 | * behaviour from browsers for versions up to 5.x | |
307 | * | |
308 | * @param qualifiedName | |
309 | * DOCUMENT ME! | |
310 | * | |
311 | * @throws IOException | |
312 | * DOCUMENT ME! | |
313 | */ | |
314 | 0 | protected void writeClose(String qualifiedName) throws IOException { |
315 | 0 | if (!omitElementClose(qualifiedName)) { |
316 | 0 | super.writeClose(qualifiedName); |
317 | } | |
318 | } | |
319 | ||
320 | 2 | protected void writeEmptyElementClose(String qualifiedName) |
321 | throws IOException { | |
322 | 2 | if (getOutputFormat().isXHTML()) { |
323 | // xhtml, always check with format object whether to expand or not. | |
324 | 0 | if (omitElementClose(qualifiedName)) { |
325 | // it was a special omit tag, do it the XHTML way: "<br/>", | |
326 | // ignoring the expansion option, since <br></br> is OK XML, | |
327 | // but produces twice the linefeeds desired in the browser. | |
328 | // for netscape 4.7, though all are fine with it, write a space | |
329 | // before the close slash. | |
330 | 0 | writer.write(" />"); |
331 | } else { | |
332 | 0 | super.writeEmptyElementClose(qualifiedName); |
333 | } | |
334 | } else { | |
335 | // html, not xhtml | |
336 | 2 | if (omitElementClose(qualifiedName)) { |
337 | // it was a special omit tag, do it the old html way: "<br>". | |
338 | 1 | writer.write(">"); |
339 | } else { | |
340 | // it was NOT a special omit tag, check with format object | |
341 | // whether to expand or not. | |
342 | 1 | super.writeEmptyElementClose(qualifiedName); |
343 | } | |
344 | } | |
345 | } | |
346 | ||
347 | 2 | protected boolean omitElementClose(String qualifiedName) { |
348 | 2 | return internalGetOmitElementCloseSet().contains( |
349 | qualifiedName.toUpperCase()); | |
350 | } | |
351 | ||
352 | 2 | private HashSet internalGetOmitElementCloseSet() { |
353 | 2 | if (omitElementCloseSet == null) { |
354 | 2 | omitElementCloseSet = new HashSet(); |
355 | 2 | loadOmitElementCloseSet(omitElementCloseSet); |
356 | } | |
357 | ||
358 | 2 | return omitElementCloseSet; |
359 | } | |
360 | ||
361 | // If you change this, change the javadoc for getOmitElementCloseSet. | |
362 | 2 | protected void loadOmitElementCloseSet(Set set) { |
363 | 2 | set.add("AREA"); |
364 | 2 | set.add("BASE"); |
365 | 2 | set.add("BR"); |
366 | 2 | set.add("COL"); |
367 | 2 | set.add("HR"); |
368 | 2 | set.add("IMG"); |
369 | 2 | set.add("INPUT"); |
370 | 2 | set.add("LINK"); |
371 | 2 | set.add("META"); |
372 | 2 | set.add("P"); |
373 | 2 | set.add("PARAM"); |
374 | } | |
375 | ||
376 | // let the people see the set, but not modify it. | |
377 | ||
378 | /** | |
379 | * A clone of the Set of elements that can have their close-tags omitted. By | |
380 | * default it should be "AREA", "BASE", "BR", "COL", "HR", "IMG", "INPUT", | |
381 | * "LINK", "META", "P", "PARAM" | |
382 | * | |
383 | * @return A clone of the Set. | |
384 | */ | |
385 | 0 | public Set getOmitElementCloseSet() { |
386 | 0 | return (Set) (internalGetOmitElementCloseSet().clone()); |
387 | } | |
388 | ||
389 | /** | |
390 | * To use the empty set, pass an empty Set, or null: | |
391 | * | |
392 | * <pre> | |
393 | * | |
394 | * | |
395 | * setOmitElementCloseSet(new HashSet()); | |
396 | * or | |
397 | * setOmitElementCloseSet(null); | |
398 | * | |
399 | * | |
400 | * </pre> | |
401 | * | |
402 | * @param newSet | |
403 | * DOCUMENT ME! | |
404 | */ | |
405 | 0 | public void setOmitElementCloseSet(Set newSet) { |
406 | // resets, and safely empties it out if newSet is null. | |
407 | 0 | omitElementCloseSet = new HashSet(); |
408 | ||
409 | 0 | if (newSet != null) { |
410 | 0 | omitElementCloseSet = new HashSet(); |
411 | ||
412 | 0 | Object aTag; |
413 | 0 | Iterator iter = newSet.iterator(); |
414 | ||
415 | 0 | while (iter.hasNext()) { |
416 | 0 | aTag = iter.next(); |
417 | ||
418 | 0 | if (aTag != null) { |
419 | 0 | omitElementCloseSet.add(aTag.toString().toUpperCase()); |
420 | } | |
421 | } | |
422 | } | |
423 | } | |
424 | ||
425 | /** | |
426 | * @see #setPreformattedTags(java.util.Set) setPreformattedTags | |
427 | */ | |
428 | 0 | public Set getPreformattedTags() { |
429 | 0 | return (Set) (preformattedTags.clone()); |
430 | } | |
431 | ||
432 | /** | |
433 | * <p> | |
434 | * Override the default set, which includes PRE, SCRIPT, STYLE, and | |
435 | * TEXTAREA, case insensitively. | |
436 | * </p> | |
437 | * | |
438 | * <p> | |
439 | * <b>Setting Preformatted Tags </b> | |
440 | * </p> | |
441 | * | |
442 | * <p> | |
443 | * Pass in a Set of Strings, one for each tag name that should be treated | |
444 | * like a PRE tag. You may pass in null or an empty Set to assign the empty | |
445 | * set, in which case no tags will be treated as preformatted, except that | |
446 | * HTML Comments will continue to be preformatted. If a tag is included in | |
447 | * the set of preformatted tags, all whitespace within the tag will be | |
448 | * preserved, including whitespace on the same line preceding the close tag. | |
449 | * This will generally make the close tag not line up with the start tag, | |
450 | * but it preserves the intention of the whitespace within the tag. | |
451 | * </p> | |
452 | * | |
453 | * <p> | |
454 | * The browser considers leading whitespace before the close tag to be | |
455 | * significant, but leading whitespace before the open tag to be | |
456 | * insignificant. For example, if the HTML author doesn't put the close | |
457 | * TEXTAREA tag flush to the left margin, then the TEXTAREA control in the | |
458 | * browser will have spaces on the last line inside the control. This may be | |
459 | * the HTML author's intent. Similarly, in a PRE, the browser treats a | |
460 | * flushed left close PRE tag as different from a close tag with leading | |
461 | * whitespace. Again, this must be left up to the HTML author. | |
462 | * </p> | |
463 | * | |
464 | * <p> | |
465 | * <b>Examples </b> | |
466 | * </p> | |
467 | * <blockquote> | |
468 | * <p> | |
469 | * Here is an example of how you can set the PreformattedTags list using | |
470 | * setPreformattedTags to include IFRAME, as well as the default set, if you | |
471 | * have an instance of this class named myHTMLWriter: | |
472 | * | |
473 | * <pre> | |
474 | * Set current = myHTMLWriter.getPreformattedTags(); | |
475 | * current.add("IFRAME"); | |
476 | * myHTMLWriter.setPreformattedTags(current); | |
477 | * | |
478 | * //The set is now <b>PRE, SCRIPT, STYLE, TEXTAREA, IFRAME</b> | |
479 | * | |
480 | * | |
481 | * </pre> | |
482 | * | |
483 | * Similarly, you can simply replace it with your own: | |
484 | * | |
485 | * <pre> | |
486 | * | |
487 | * | |
488 | * HashSet newset = new HashSet(); | |
489 | * newset.add("PRE"); | |
490 | * newset.add("TEXTAREA"); | |
491 | * myHTMLWriter.setPreformattedTags(newset); | |
492 | * | |
493 | * //The set is now <b>{PRE, TEXTAREA}</b> | |
494 | * | |
495 | * | |
496 | * </pre> | |
497 | * | |
498 | * You can remove all tags from the preformatted tags list, with an empty | |
499 | * set, like this: | |
500 | * | |
501 | * <pre> | |
502 | * | |
503 | * | |
504 | * myHTMLWriter.setPreformattedTags(new HashSet()); | |
505 | * | |
506 | * //The set is now <b>{}</b> | |
507 | * | |
508 | * | |
509 | * </pre> | |
510 | * | |
511 | * or with null, like this: | |
512 | * | |
513 | * <pre> | |
514 | * | |
515 | * | |
516 | * myHTMLWriter.setPreformattedTags(null); | |
517 | * | |
518 | * //The set is now <b>{}</b> | |
519 | * | |
520 | * | |
521 | * </pre> | |
522 | * | |
523 | * </p> | |
524 | * </blockquote> | |
525 | * | |
526 | * @param newSet | |
527 | * DOCUMENT ME! | |
528 | */ | |
529 | 0 | public void setPreformattedTags(Set newSet) { |
530 | // no fancy merging, just set it, assuming they did a | |
531 | // getExcludeTrimTags() first if they wanted to preserve the default | |
532 | // set. | |
533 | // resets, and safely empties it out if newSet is null. | |
534 | 0 | preformattedTags = new HashSet(); |
535 | ||
536 | 0 | if (newSet != null) { |
537 | 0 | Object aTag; |
538 | 0 | Iterator iter = newSet.iterator(); |
539 | ||
540 | 0 | while (iter.hasNext()) { |
541 | 0 | aTag = iter.next(); |
542 | ||
543 | 0 | if (aTag != null) { |
544 | 0 | preformattedTags.add(aTag.toString().toUpperCase()); |
545 | } | |
546 | } | |
547 | } | |
548 | } | |
549 | ||
550 | /** | |
551 | * DOCUMENT ME! | |
552 | * | |
553 | * @param qualifiedName | |
554 | * DOCUMENT ME! | |
555 | * | |
556 | * @return true if the qualifiedName passed in matched (case-insensitively) | |
557 | * a tag in the preformattedTags set, or false if not found or if | |
558 | * the set is empty or null. | |
559 | * | |
560 | * @see #setPreformattedTags(java.util.Set) setPreformattedTags | |
561 | */ | |
562 | 9 | public boolean isPreformattedTag(String qualifiedName) { |
563 | // A null set implies that the user called setPreformattedTags(null), | |
564 | // which means they want no tags to be preformatted. | |
565 | 9 | return (preformattedTags != null) |
566 | && (preformattedTags.contains(qualifiedName.toUpperCase())); | |
567 | } | |
568 | ||
569 | /** | |
570 | * This override handles any elements that should not remove whitespace, | |
571 | * such as <PRE>, <SCRIPT>, <STYLE>, and <TEXTAREA>. | |
572 | * Note: the close tags won't line up with the open tag, but we can't alter | |
573 | * that. See javadoc note at setPreformattedTags. | |
574 | * | |
575 | * @param element | |
576 | * DOCUMENT ME! | |
577 | * | |
578 | * @throws IOException | |
579 | * When the stream could not be written to. | |
580 | * | |
581 | * @see #setPreformattedTags(java.util.Set) setPreformattedTags | |
582 | */ | |
583 | 9 | protected void writeElement(Element element) throws IOException { |
584 | 9 | if (newLineAfterNTags == -1) { // lazy initialization check |
585 | 3 | lazyInitNewLinesAfterNTags(); |
586 | } | |
587 | ||
588 | 9 | if (newLineAfterNTags > 0) { |
589 | 0 | if ((tagsOuput > 0) && ((tagsOuput % newLineAfterNTags) == 0)) { |
590 | 0 | super.writer.write(lineSeparator); |
591 | } | |
592 | } | |
593 | ||
594 | 9 | tagsOuput++; |
595 | ||
596 | 9 | String qualifiedName = element.getQualifiedName(); |
597 | 9 | String saveLastText = lastText; |
598 | 9 | int size = element.nodeCount(); |
599 | ||
600 | 9 | if (isPreformattedTag(qualifiedName)) { |
601 | 0 | OutputFormat currentFormat = getOutputFormat(); |
602 | 0 | boolean saveNewlines = currentFormat.isNewlines(); |
603 | 0 | boolean saveTrimText = currentFormat.isTrimText(); |
604 | 0 | String currentIndent = currentFormat.getIndent(); |
605 | ||
606 | // You could have nested PREs, or SCRIPTS within PRE... etc., | |
607 | // therefore use push and pop. | |
608 | 0 | formatStack.push(new FormatState(saveNewlines, saveTrimText, |
609 | currentIndent)); | |
610 | ||
611 | 0 | try { |
612 | // do this manually, since it won't be done while outputting | |
613 | // the tag. | |
614 | 0 | super.writePrintln(); |
615 | ||
616 | 0 | if ((saveLastText.trim().length() == 0) |
617 | && (currentIndent != null) | |
618 | && (currentIndent.length() > 0)) { | |
619 | // We are indenting, but we want to line up with the close | |
620 | // tag. lastText was the indent (whitespace, no \n) before | |
621 | // the preformatted start tag. So write it out instead of | |
622 | // the current indent level. This makes it line up with its | |
623 | // close tag. | |
624 | 0 | super.writer.write(justSpaces(saveLastText)); |
625 | } | |
626 | ||
627 | // actually, newlines are handled in this class by writeString, | |
628 | // depending on if the stack is empty. | |
629 | 0 | currentFormat.setNewlines(false); |
630 | 0 | currentFormat.setTrimText(false); |
631 | 0 | currentFormat.setIndent(""); |
632 | ||
633 | // This line is the recursive one: | |
634 | 0 | super.writeElement(element); |
635 | } finally { | |
636 | 0 | FormatState state = (FormatState) formatStack.pop(); |
637 | 0 | currentFormat.setNewlines(state.isNewlines()); |
638 | 0 | currentFormat.setTrimText(state.isTrimText()); |
639 | 0 | currentFormat.setIndent(state.getIndent()); |
640 | } | |
641 | } else { | |
642 | 9 | super.writeElement(element); |
643 | } | |
644 | } | |
645 | ||
646 | 0 | private String justSpaces(String text) { |
647 | 0 | int size = text.length(); |
648 | 0 | StringBuffer res = new StringBuffer(size); |
649 | 0 | char c; |
650 | ||
651 | 0 | for (int i = 0; i < size; i++) { |
652 | 0 | c = text.charAt(i); |
653 | ||
654 | 0 | switch (c) { |
655 | 0 | case '\r': |
656 | 0 | case '\n': |
657 | ||
658 | 0 | continue; |
659 | ||
660 | 0 | default: |
661 | 0 | res.append(c); |
662 | } | |
663 | } | |
664 | ||
665 | 0 | return res.toString(); |
666 | } | |
667 | ||
668 | 3 | private void lazyInitNewLinesAfterNTags() { |
669 | 3 | if (getOutputFormat().isNewlines()) { |
670 | // don't bother, newlines are going to happen anyway. | |
671 | 2 | newLineAfterNTags = 0; |
672 | } else { | |
673 | 1 | newLineAfterNTags = getOutputFormat().getNewLineAfterNTags(); |
674 | } | |
675 | } | |
676 | ||
677 | // Convenience methods, static, with bunch-o-defaults | |
678 | ||
679 | /** | |
680 | * Convenience method to just get a String result. | |
681 | * | |
682 | * @param html | |
683 | * DOCUMENT ME! | |
684 | * | |
685 | * @return a pretty printed String from the source string, preserving | |
686 | * whitespace in the defaultPreformattedTags set, and leaving the | |
687 | * close tags off of the default omitElementCloseSet set. Use one of | |
688 | * the write methods if you want stream output. | |
689 | * | |
690 | * @throws java.io.IOException | |
691 | * @throws java.io.UnsupportedEncodingException | |
692 | * @throws org.dom4j.DocumentException | |
693 | */ | |
694 | 0 | public static String prettyPrintHTML(String html) |
695 | throws java.io.IOException, java.io.UnsupportedEncodingException, | |
696 | org.dom4j.DocumentException { | |
697 | 0 | return prettyPrintHTML(html, true, true, false, true); |
698 | } | |
699 | ||
700 | /** | |
701 | * Convenience method to just get a String result, but <b>As XHTML </b>. | |
702 | * | |
703 | * @param html | |
704 | * DOCUMENT ME! | |
705 | * | |
706 | * @return a pretty printed String from the source string, preserving | |
707 | * whitespace in the defaultPreformattedTags set, but conforming to | |
708 | * XHTML: no close tags are omitted (though if empty, they will be | |
709 | * converted to XHTML empty tags: <HR/> Use one of the write | |
710 | * methods if you want stream output. | |
711 | * | |
712 | * @throws java.io.IOException | |
713 | * @throws java.io.UnsupportedEncodingException | |
714 | * @throws org.dom4j.DocumentException | |
715 | */ | |
716 | 0 | public static String prettyPrintXHTML(String html) |
717 | throws java.io.IOException, java.io.UnsupportedEncodingException, | |
718 | org.dom4j.DocumentException { | |
719 | 0 | return prettyPrintHTML(html, true, true, true, false); |
720 | } | |
721 | ||
722 | /** | |
723 | * DOCUMENT ME! | |
724 | * | |
725 | * @param html | |
726 | * DOCUMENT ME! | |
727 | * @param newlines | |
728 | * DOCUMENT ME! | |
729 | * @param trim | |
730 | * DOCUMENT ME! | |
731 | * @param isXHTML | |
732 | * DOCUMENT ME! | |
733 | * @param expandEmpty | |
734 | * DOCUMENT ME! | |
735 | * | |
736 | * @return a pretty printed String from the source string, preserving | |
737 | * whitespace in the defaultPreformattedTags set, and leaving the | |
738 | * close tags off of the default omitElementCloseSet set. This | |
739 | * override allows you to specify various formatter options. Use one | |
740 | * of the write methods if you want stream output. | |
741 | * | |
742 | * @throws java.io.IOException | |
743 | * @throws java.io.UnsupportedEncodingException | |
744 | * @throws org.dom4j.DocumentException | |
745 | */ | |
746 | 0 | public static String prettyPrintHTML(String html, boolean newlines, |
747 | boolean trim, boolean isXHTML, boolean expandEmpty) | |
748 | throws java.io.IOException, java.io.UnsupportedEncodingException, | |
749 | org.dom4j.DocumentException { | |
750 | 0 | StringWriter sw = new StringWriter(); |
751 | 0 | OutputFormat format = OutputFormat.createPrettyPrint(); |
752 | 0 | format.setNewlines(newlines); |
753 | 0 | format.setTrimText(trim); |
754 | 0 | format.setXHTML(isXHTML); |
755 | 0 | format.setExpandEmptyElements(expandEmpty); |
756 | ||
757 | 0 | HTMLWriter writer = new HTMLWriter(sw, format); |
758 | 0 | Document document = DocumentHelper.parseText(html); |
759 | 0 | writer.write(document); |
760 | 0 | writer.flush(); |
761 | ||
762 | 0 | return sw.toString(); |
763 | } | |
764 | ||
765 | // Allows us to the current state of the format in this struct on the | |
766 | // formatStack. | |
767 | private class FormatState { | |
768 | private boolean newlines = false; | |
769 | ||
770 | private boolean trimText = false; | |
771 | ||
772 | private String indent = ""; | |
773 | ||
774 | 0 | public FormatState(boolean newLines, boolean trimText, String indent) { |
775 | 0 | this.newlines = newLines; |
776 | 0 | this.trimText = trimText; |
777 | 0 | this.indent = indent; |
778 | } | |
779 | ||
780 | 0 | public boolean isNewlines() { |
781 | 0 | return newlines; |
782 | } | |
783 | ||
784 | 0 | public boolean isTrimText() { |
785 | 0 | return trimText; |
786 | } | |
787 | ||
788 | 0 | public String getIndent() { |
789 | 0 | return indent; |
790 | } | |
791 | } | |
792 | } | |
793 | ||
794 | /* | |
795 | * <html> <head> <title>My Title </title> <style> .foo { text-align: Right; } | |
796 | * </style> <script> function mojo(){ return "bar"; } </script> <script | |
797 | * language="JavaScript"> <!-- //this is the canonical javascript hiding. | |
798 | * function foo(){ return "foo"; } //--> </script> </head> <!-- this is a | |
799 | * comment --> <body bgcolor="#A4BFDD" mojo="&"> entities:   & | |
800 | * " < > %23 <p></p> <mojo> </mojo> <foo /> <table border="1"> <tr> | |
801 | * <td><pre> line0 <hr /> line1 <b>line2, should line up, indent-wise </b> line | |
802 | * 3 line 4 </pre></td><td></td></tr> </table> <myCDATAElement> <![CDATA[My | |
803 | * data]]> </myCDATAElement> </body> </html> | |
804 | */ | |
805 | ||
806 | /* | |
807 | * Redistribution and use of this software and associated documentation | |
808 | * ("Software"), with or without modification, are permitted provided that the | |
809 | * following conditions are met: | |
810 | * | |
811 | * 1. Redistributions of source code must retain copyright statements and | |
812 | * notices. Redistributions must also contain a copy of this document. | |
813 | * | |
814 | * 2. Redistributions in binary form must reproduce the above copyright notice, | |
815 | * this list of conditions and the following disclaimer in the documentation | |
816 | * and/or other materials provided with the distribution. | |
817 | * | |
818 | * 3. The name "DOM4J" must not be used to endorse or promote products derived | |
819 | * from this Software without prior written permission of MetaStuff, Ltd. For | |
820 | * written permission, please contact dom4j-info@metastuff.com. | |
821 | * | |
822 | * 4. Products derived from this Software may not be called "DOM4J" nor may | |
823 | * "DOM4J" appear in their names without prior written permission of MetaStuff, | |
824 | * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd. | |
825 | * | |
826 | * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org | |
827 | * | |
828 | * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND | |
829 | * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
830 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
831 | * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE | |
832 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
833 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
834 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
835 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
836 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
837 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
838 | * POSSIBILITY OF SUCH DAMAGE. | |
839 | * | |
840 | * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved. | |
841 | */ |
|