diff options
author | Leif Åstrand <leif@vaadin.com> | 2012-09-05 19:50:56 +0300 |
---|---|---|
committer | Leif Åstrand <leif@vaadin.com> | 2012-09-05 19:51:15 +0300 |
commit | 7d25670284b11c7c62ba25183f265227cb3dba83 (patch) | |
tree | c8e76eb70dd3cdd5cf59a99419635f2188b25c24 /server/src/org/jsoup/examples/HtmlToPlainText.java | |
parent | 1d0c96de9595c243d88471476d21e5f248be63f7 (diff) | |
download | vaadin-framework-7d25670284b11c7c62ba25183f265227cb3dba83.tar.gz vaadin-framework-7d25670284b11c7c62ba25183f265227cb3dba83.zip |
Reformat project
Diffstat (limited to 'server/src/org/jsoup/examples/HtmlToPlainText.java')
-rw-r--r-- | server/src/org/jsoup/examples/HtmlToPlainText.java | 63 |
1 files changed, 41 insertions, 22 deletions
diff --git a/server/src/org/jsoup/examples/HtmlToPlainText.java b/server/src/org/jsoup/examples/HtmlToPlainText.java index 8f563e9608..53e485be34 100644 --- a/server/src/org/jsoup/examples/HtmlToPlainText.java +++ b/server/src/org/jsoup/examples/HtmlToPlainText.java @@ -1,5 +1,7 @@ package org.jsoup.examples; +import java.io.IOException; + import org.jsoup.Jsoup; import org.jsoup.helper.StringUtil; import org.jsoup.helper.Validate; @@ -10,15 +12,15 @@ import org.jsoup.nodes.TextNode; import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeVisitor; -import java.io.IOException; - /** - * HTML to plain-text. This example program demonstrates the use of jsoup to convert HTML input to lightly-formatted - * plain-text. That is divergent from the general goal of jsoup's .text() methods, which is to get clean data from a - * scrape. + * HTML to plain-text. This example program demonstrates the use of jsoup to + * convert HTML input to lightly-formatted plain-text. That is divergent from + * the general goal of jsoup's .text() methods, which is to get clean data from + * a scrape. * <p/> - * Note that this is a fairly simplistic formatter -- for real world use you'll want to embrace and extend. - * + * Note that this is a fairly simplistic formatter -- for real world use you'll + * want to embrace and extend. + * * @author Jonathan Hedley, jonathan@hedley.net */ public class HtmlToPlainText { @@ -36,13 +38,16 @@ public class HtmlToPlainText { /** * Format an Element to plain-text - * @param element the root element to format + * + * @param element + * the root element to format * @return formatted text */ public String getPlainText(Element element) { FormattingVisitor formatter = new FormattingVisitor(); NodeTraversor traversor = new NodeTraversor(formatter); - traversor.traverse(element); // walk the DOM, and call .head() and .tail() for each node + traversor.traverse(element); // walk the DOM, and call .head() and + // .tail() for each node return formatter.toString(); } @@ -51,44 +56,57 @@ public class HtmlToPlainText { private class FormattingVisitor implements NodeVisitor { private static final int maxWidth = 80; private int width = 0; - private StringBuilder accum = new StringBuilder(); // holds the accumulated text + private StringBuilder accum = new StringBuilder(); // holds the + // accumulated text // hit when the node is first seen + @Override public void head(Node node, int depth) { String name = node.nodeName(); - if (node instanceof TextNode) - append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM. - else if (name.equals("li")) + if (node instanceof TextNode) { + append(((TextNode) node).text()); // TextNodes carry all + // user-readable text in the + // DOM. + } else if (name.equals("li")) { append("\n * "); + } } // hit when all of the node's children (if any) have been visited + @Override public void tail(Node node, int depth) { String name = node.nodeName(); - if (name.equals("br")) + if (name.equals("br")) { append("\n"); - else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5")) + } else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5")) { append("\n\n"); - else if (name.equals("a")) + } else if (name.equals("a")) { append(String.format(" <%s>", node.absUrl("href"))); + } } // appends text to the string builder with a simple word wrap method private void append(String text) { - if (text.startsWith("\n")) - width = 0; // reset counter if starts with a newline. only from formats above, not in natural text - if (text.equals(" ") && - (accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n"))) + if (text.startsWith("\n")) { + width = 0; // reset counter if starts with a newline. only from + // formats above, not in natural text + } + if (text.equals(" ") + && (accum.length() == 0 || StringUtil.in( + accum.substring(accum.length() - 1), " ", "\n"))) { return; // don't accumulate long runs of empty spaces + } if (text.length() + width > maxWidth) { // won't fit, needs to wrap String words[] = text.split("\\s+"); for (int i = 0; i < words.length; i++) { String word = words[i]; boolean last = i == words.length - 1; - if (!last) // insert a space if not the last word + if (!last) { word = word + " "; - if (word.length() + width > maxWidth) { // wrap and reset counter + } + if (word.length() + width > maxWidth) { // wrap and reset + // counter accum.append("\n").append(word); width = word.length(); } else { @@ -102,6 +120,7 @@ public class HtmlToPlainText { } } + @Override public String toString() { return accum.toString(); } |