Browse Source

Reformat project

tags/7.0.0.beta1
Leif Åstrand 11 years ago
parent
commit
7d25670284
100 changed files with 5271 additions and 3557 deletions
  1. 1
    1
      client/src/com/vaadin/client/ComponentLocator.java
  2. 0
    1
      client/src/com/vaadin/client/Util.java
  3. 2
    2
      client/src/com/vaadin/client/VDebugConsole.java
  4. 2
    2
      client/src/com/vaadin/client/ui/AbstractComponentConnector.java
  5. 1
    1
      client/src/com/vaadin/client/ui/AbstractComponentContainerConnector.java
  6. 1
    1
      client/src/com/vaadin/client/ui/AbstractConnector.java
  7. 0
    1
      client/src/com/vaadin/client/ui/FocusableScrollPanel.java
  8. 1
    2
      client/src/com/vaadin/client/ui/gridlayout/GridLayoutConnector.java
  9. 1
    2
      client/src/com/vaadin/client/ui/panel/PanelConnector.java
  10. 1
    1
      client/src/com/vaadin/client/ui/panel/VPanel.java
  11. 1
    1
      client/src/com/vaadin/client/ui/popupview/VPopupView.java
  12. 1
    1
      client/src/com/vaadin/client/ui/richtextarea/VRichTextArea.java
  13. 4
    0
      client/src/com/vaadin/client/ui/slider/VSlider.java
  14. 1
    1
      client/src/com/vaadin/client/ui/splitpanel/VAbstractSplitPanel.java
  15. 0
    1
      client/src/com/vaadin/client/ui/splitpanel/VerticalSplitPanelConnector.java
  16. 2
    2
      client/src/com/vaadin/client/ui/ui/VUI.java
  17. 1
    1
      client/src/com/vaadin/client/ui/window/VWindow.java
  18. 0
    2
      client/tests/src/com/vaadin/client/ApplicationConnectionTestURLGeneration.java
  19. 0
    2
      client/tests/src/com/vaadin/client/DateTimeServiceTest.java
  20. 2
    2
      client/tests/src/com/vaadin/client/TestVBrowserDetailsUserAgentParser.java
  21. 3
    3
      server/src/com/vaadin/Application.java
  22. 4
    0
      server/src/com/vaadin/data/util/AbstractProperty.java
  23. 4
    0
      server/src/com/vaadin/data/util/ContainerHierarchicalWrapper.java
  24. 4
    0
      server/src/com/vaadin/data/util/ContainerOrderedWrapper.java
  25. 4
    0
      server/src/com/vaadin/data/util/IndexedContainer.java
  26. 2
    0
      server/src/com/vaadin/data/util/PropertysetItem.java
  27. 2
    0
      server/src/com/vaadin/data/util/sqlcontainer/SQLContainer.java
  28. 2
    0
      server/src/com/vaadin/data/util/sqlcontainer/query/TableQuery.java
  29. 1
    1
      server/src/com/vaadin/external/json/JSONException.java
  30. 1
    1
      server/src/com/vaadin/external/json/JSONStringer.java
  31. 2
    2
      server/src/com/vaadin/server/AbstractUIProvider.java
  32. 3
    3
      server/src/com/vaadin/server/AddonContext.java
  33. 1
    0
      server/src/com/vaadin/server/ApplicationConfiguration.java
  34. 2
    2
      server/src/com/vaadin/server/ApplicationStartedEvent.java
  35. 3
    4
      server/src/com/vaadin/server/ApplicationStartedListener.java
  36. 1
    2
      server/src/com/vaadin/server/CommunicationManager.java
  37. 60
    66
      server/src/com/vaadin/server/CustomizedSystemMessages.java
  38. 2
    1
      server/src/com/vaadin/server/GAEVaadinServlet.java
  39. 3
    4
      server/src/com/vaadin/server/LegacyVaadinPortlet.java
  40. 3
    4
      server/src/com/vaadin/server/LegacyVaadinServlet.java
  41. 1
    2
      server/src/com/vaadin/server/PortletCommunicationManager.java
  42. 0
    1
      server/src/com/vaadin/server/RequestHandler.java
  43. 0
    1
      server/src/com/vaadin/server/RequestTimer.java
  44. 0
    1
      server/src/com/vaadin/server/ServerRpcManager.java
  45. 20
    29
      server/src/com/vaadin/server/SystemMessages.java
  46. 2
    2
      server/src/com/vaadin/server/UIProvider.java
  47. 0
    1
      server/src/com/vaadin/server/UnsupportedBrowserHandler.java
  48. 3
    4
      server/src/com/vaadin/server/VaadinPortlet.java
  49. 5
    4
      server/src/com/vaadin/server/VaadinServlet.java
  50. 0
    1
      server/src/com/vaadin/server/WrappedHttpServletRequest.java
  51. 2
    4
      server/src/com/vaadin/ui/AbstractOrderedLayout.java
  52. 4
    0
      server/src/com/vaadin/ui/AbstractSelect.java
  53. 5
    0
      server/src/com/vaadin/ui/ComboBox.java
  54. 4
    0
      server/src/com/vaadin/ui/CustomField.java
  55. 5
    1
      server/src/com/vaadin/ui/DateField.java
  56. 1
    1
      server/src/com/vaadin/ui/DragAndDropWrapper.java
  57. 1
    1
      server/src/com/vaadin/ui/Embedded.java
  58. 2
    4
      server/src/com/vaadin/ui/Flash.java
  59. 2
    0
      server/src/com/vaadin/ui/Label.java
  60. 1
    1
      server/src/com/vaadin/ui/MenuBar.java
  61. 4
    0
      server/src/com/vaadin/ui/OptionGroup.java
  62. 1
    1
      server/src/com/vaadin/ui/ProgressIndicator.java
  63. 1
    1
      server/src/com/vaadin/ui/RichTextArea.java
  64. 4
    0
      server/src/com/vaadin/ui/TabSheet.java
  65. 2
    0
      server/src/com/vaadin/ui/Table.java
  66. 2
    0
      server/src/com/vaadin/ui/Tree.java
  67. 2
    2
      server/src/com/vaadin/ui/UI.java
  68. 1
    1
      server/src/com/vaadin/ui/Upload.java
  69. 216
    90
      server/src/org/jsoup/Connection.java
  70. 194
    130
      server/src/org/jsoup/Jsoup.java
  71. 41
    22
      server/src/org/jsoup/examples/HtmlToPlainText.java
  72. 16
    12
      server/src/org/jsoup/examples/ListLinks.java
  73. 100
    49
      server/src/org/jsoup/helper/DataUtil.java
  74. 21
    4
      server/src/org/jsoup/helper/DescendableLinkedList.java
  75. 258
    102
      server/src/org/jsoup/helper/HttpConnection.java
  76. 53
    26
      server/src/org/jsoup/helper/StringUtil.java
  77. 67
    29
      server/src/org/jsoup/helper/Validate.java
  78. 73
    37
      server/src/org/jsoup/nodes/Attribute.java
  79. 130
    64
      server/src/org/jsoup/nodes/Attributes.java
  80. 25
    15
      server/src/org/jsoup/nodes/Comment.java
  81. 37
    17
      server/src/org/jsoup/nodes/DataNode.java
  82. 122
    70
      server/src/org/jsoup/nodes/Document.java
  83. 19
    9
      server/src/org/jsoup/nodes/DocumentType.java
  84. 513
    277
      server/src/org/jsoup/nodes/Element.java
  85. 72
    39
      server/src/org/jsoup/nodes/Entities.java
  86. 257
    145
      server/src/org/jsoup/nodes/Node.java
  87. 62
    31
      server/src/org/jsoup/nodes/TextNode.java
  88. 31
    18
      server/src/org/jsoup/nodes/XmlDeclaration.java
  89. 34
    20
      server/src/org/jsoup/parser/CharacterReader.java
  90. 184
    102
      server/src/org/jsoup/parser/HtmlTreeBuilder.java
  91. 917
    728
      server/src/org/jsoup/parser/HtmlTreeBuilderState.java
  92. 5
    2
      server/src/org/jsoup/parser/ParseError.java
  93. 4
    4
      server/src/org/jsoup/parser/ParseErrorList.java
  94. 82
    41
      server/src/org/jsoup/parser/Parser.java
  95. 92
    56
      server/src/org/jsoup/parser/Tag.java
  96. 19
    18
      server/src/org/jsoup/parser/Token.java
  97. 177
    97
      server/src/org/jsoup/parser/TokenQueue.java
  98. 69
    35
      server/src/org/jsoup/parser/Tokeniser.java
  99. 1177
    1085
      server/src/org/jsoup/parser/TokeniserState.java
  100. 0
    0
      server/src/org/jsoup/parser/TreeBuilder.java

+ 1
- 1
client/src/com/vaadin/client/ComponentLocator.java View File

@@ -596,7 +596,7 @@ public class ComponentLocator {

Widget child = iterator.next();
String simpleName2 = Util.getSimpleName(child);
if (widgetClassName.equals(simpleName2)) {
if (widgetPosition == 0) {
w = child;

+ 0
- 1
client/src/com/vaadin/client/Util.java View File

@@ -534,7 +534,6 @@ public class Util {

}


@Deprecated
public static boolean isCached(UIDL uidl) {
return uidl.getBooleanAttribute("cached");

+ 2
- 2
client/src/com/vaadin/client/VDebugConsole.java View File

@@ -924,8 +924,8 @@ public class VDebugConsole extends VOverlay implements Console {
protected void dumpConnectorInfo(ApplicationConnection a) {
UIConnector root = a.getRootConnector();
log("================");
log("Connector hierarchy for Root: " + root.getState().caption
+ " (" + root.getConnectorId() + ")");
log("Connector hierarchy for Root: " + root.getState().caption + " ("
+ root.getConnectorId() + ")");
Set<ServerConnector> connectorsInHierarchy = new HashSet<ServerConnector>();
SimpleTree rootHierachy = dumpConnectorHierarchy(root, "",
connectorsInHierarchy);

+ 2
- 2
client/src/com/vaadin/client/ui/AbstractComponentConnector.java View File

@@ -47,7 +47,7 @@ import com.vaadin.shared.ui.TabIndexState;
import com.vaadin.ui.themes.BaseTheme;

public abstract class AbstractComponentConnector extends AbstractConnector
implements ComponentConnector {
implements ComponentConnector {

private Widget widget;

@@ -93,7 +93,7 @@ implements ComponentConnector {
"There is no information about the widget for "
+ Util.getSimpleName(this)
+ ". Did you remember to compile the right widgetset?",
e);
e);
}
}


+ 1
- 1
client/src/com/vaadin/client/ui/AbstractComponentContainerConnector.java View File

@@ -22,9 +22,9 @@ import com.google.gwt.event.shared.HandlerRegistration;
import com.vaadin.client.ComponentConnector;
import com.vaadin.client.ComponentContainerConnector;
import com.vaadin.client.ConnectorHierarchyChangeEvent;
import com.vaadin.client.ConnectorHierarchyChangeEvent.ConnectorHierarchyChangeHandler;
import com.vaadin.client.Util;
import com.vaadin.client.VConsole;
import com.vaadin.client.ConnectorHierarchyChangeEvent.ConnectorHierarchyChangeHandler;

public abstract class AbstractComponentContainerConnector extends
AbstractComponentConnector implements ComponentContainerConnector,

+ 1
- 1
client/src/com/vaadin/client/ui/AbstractConnector.java View File

@@ -46,7 +46,7 @@ import com.vaadin.shared.communication.URLReference;
*
*/
public abstract class AbstractConnector implements ServerConnector,
StateChangeHandler {
StateChangeHandler {

private ApplicationConnection connection;
private String id;

+ 0
- 1
client/src/com/vaadin/client/ui/FocusableScrollPanel.java View File

@@ -22,7 +22,6 @@ import com.google.gwt.core.client.Scheduler.ScheduledCommand;
import com.google.gwt.dom.client.DivElement;
import com.google.gwt.dom.client.Document;
import com.google.gwt.dom.client.Style;
import com.google.gwt.dom.client.Style.Overflow;
import com.google.gwt.dom.client.Style.Position;
import com.google.gwt.dom.client.Style.Unit;
import com.google.gwt.event.dom.client.HasScrollHandlers;

+ 1
- 2
client/src/com/vaadin/client/ui/gridlayout/GridLayoutConnector.java View File

@@ -163,8 +163,7 @@ public class GridLayoutConnector extends AbstractComponentContainerConnector
layout.colExpandRatioArray = uidl.getIntArrayAttribute("colExpand");
layout.rowExpandRatioArray = uidl.getIntArrayAttribute("rowExpand");

layout.updateMarginStyleNames(new MarginInfo(getState()
.marginsBitmask));
layout.updateMarginStyleNames(new MarginInfo(getState().marginsBitmask));

layout.updateSpacingStyleName(getState().spacing);


+ 1
- 2
client/src/com/vaadin/client/ui/panel/PanelConnector.java View File

@@ -146,8 +146,7 @@ public class PanelConnector extends AbstractComponentContainerConnector
getWidget().setIconUri(null, client);
}

getWidget().setErrorIndicatorVisible(
null != getState().errorMessage);
getWidget().setErrorIndicatorVisible(null != getState().errorMessage);

// We may have actions attached to this panel
if (uidl.getChildCount() > 0) {

+ 1
- 1
client/src/com/vaadin/client/ui/panel/VPanel.java View File

@@ -26,8 +26,8 @@ import com.vaadin.client.ApplicationConnection;
import com.vaadin.client.Focusable;
import com.vaadin.client.ui.Icon;
import com.vaadin.client.ui.ShortcutActionHandler;
import com.vaadin.client.ui.TouchScrollDelegate;
import com.vaadin.client.ui.ShortcutActionHandler.ShortcutActionHandlerOwner;
import com.vaadin.client.ui.TouchScrollDelegate;
import com.vaadin.client.ui.TouchScrollDelegate.TouchScrollHandler;

public class VPanel extends SimplePanel implements ShortcutActionHandlerOwner,

+ 1
- 1
client/src/com/vaadin/client/ui/popupview/VPopupView.java View File

@@ -42,8 +42,8 @@ import com.vaadin.client.UIDL;
import com.vaadin.client.VCaptionWrapper;
import com.vaadin.client.VConsole;
import com.vaadin.client.ui.ShortcutActionHandler;
import com.vaadin.client.ui.VOverlay;
import com.vaadin.client.ui.ShortcutActionHandler.ShortcutActionHandlerOwner;
import com.vaadin.client.ui.VOverlay;
import com.vaadin.client.ui.richtextarea.VRichTextArea;

public class VPopupView extends HTML {

+ 1
- 1
client/src/com/vaadin/client/ui/richtextarea/VRichTextArea.java View File

@@ -42,8 +42,8 @@ import com.vaadin.client.ConnectorMap;
import com.vaadin.client.Util;
import com.vaadin.client.ui.Field;
import com.vaadin.client.ui.ShortcutActionHandler;
import com.vaadin.client.ui.TouchScrollDelegate;
import com.vaadin.client.ui.ShortcutActionHandler.ShortcutActionHandlerOwner;
import com.vaadin.client.ui.TouchScrollDelegate;

/**
* This class implements a basic client side rich text editor component.

+ 4
- 0
client/src/com/vaadin/client/ui/slider/VSlider.java View File

@@ -558,15 +558,18 @@ public class VSlider extends SimpleFocusablePanel implements Field,
this.resolution = resolution;
}

@Override
public HandlerRegistration addValueChangeHandler(
ValueChangeHandler<Double> handler) {
return addHandler(handler, ValueChangeEvent.getType());
}

@Override
public Double getValue() {
return value;
}

@Override
public void setValue(Double value) {
if (value < min) {
value = min;
@@ -613,6 +616,7 @@ public class VSlider extends SimpleFocusablePanel implements Field,
setFeedbackValue(v);
}

@Override
public void setValue(Double value, boolean fireEvents) {
if (value == null) {
return;

+ 1
- 1
client/src/com/vaadin/client/ui/splitpanel/VAbstractSplitPanel.java View File

@@ -44,8 +44,8 @@ import com.vaadin.client.LayoutManager;
import com.vaadin.client.Util;
import com.vaadin.client.VConsole;
import com.vaadin.client.ui.TouchScrollDelegate;
import com.vaadin.client.ui.VOverlay;
import com.vaadin.client.ui.TouchScrollDelegate.TouchScrollHandler;
import com.vaadin.client.ui.VOverlay;
import com.vaadin.client.ui.splitpanel.VAbstractSplitPanel.SplitterMoveHandler.SplitterMoveEvent;

public class VAbstractSplitPanel extends ComplexPanel {

+ 0
- 1
client/src/com/vaadin/client/ui/splitpanel/VerticalSplitPanelConnector.java View File

@@ -15,7 +15,6 @@
*/
package com.vaadin.client.ui.splitpanel;

import com.google.gwt.core.client.GWT;
import com.vaadin.shared.ui.Connect;
import com.vaadin.shared.ui.Connect.LoadStyle;
import com.vaadin.ui.VerticalSplitPanel;

+ 2
- 2
client/src/com/vaadin/client/ui/ui/VUI.java View File

@@ -39,10 +39,10 @@ import com.vaadin.client.ConnectorMap;
import com.vaadin.client.Focusable;
import com.vaadin.client.VConsole;
import com.vaadin.client.ui.ShortcutActionHandler;
import com.vaadin.client.ui.TouchScrollDelegate;
import com.vaadin.client.ui.VLazyExecutor;
import com.vaadin.client.ui.ShortcutActionHandler.ShortcutActionHandlerOwner;
import com.vaadin.client.ui.TouchScrollDelegate;
import com.vaadin.client.ui.TouchScrollDelegate.TouchScrollHandler;
import com.vaadin.client.ui.VLazyExecutor;
import com.vaadin.client.ui.textfield.VTextField;
import com.vaadin.shared.ApplicationConstants;
import com.vaadin.shared.ui.ui.UIConstants;

+ 1
- 1
client/src/com/vaadin/client/ui/window/VWindow.java View File

@@ -48,9 +48,9 @@ import com.vaadin.client.LayoutManager;
import com.vaadin.client.Util;
import com.vaadin.client.ui.FocusableScrollPanel;
import com.vaadin.client.ui.ShortcutActionHandler;
import com.vaadin.client.ui.ShortcutActionHandler.ShortcutActionHandlerOwner;
import com.vaadin.client.ui.VLazyExecutor;
import com.vaadin.client.ui.VOverlay;
import com.vaadin.client.ui.ShortcutActionHandler.ShortcutActionHandlerOwner;
import com.vaadin.client.ui.notification.VNotification;
import com.vaadin.shared.EventId;


+ 0
- 2
client/tests/src/com/vaadin/client/ApplicationConnectionTestURLGeneration.java View File

@@ -4,8 +4,6 @@ import static org.junit.Assert.assertEquals;

import org.junit.Test;

import com.vaadin.client.ApplicationConnection;

public class ApplicationConnectionTestURLGeneration {

private static final String[] URIS = new String[] {

+ 0
- 2
client/tests/src/com/vaadin/client/DateTimeServiceTest.java View File

@@ -7,8 +7,6 @@ import java.util.Map;

import junit.framework.TestCase;

import com.vaadin.client.DateTimeService;

public class DateTimeServiceTest extends TestCase {

final long MILLISECONDS_PER_DAY = 24 * 3600 * 1000;

+ 2
- 2
client/tests/src/com/vaadin/client/TestVBrowserDetailsUserAgentParser.java View File

@@ -1,9 +1,9 @@
package com.vaadin.client;

import com.vaadin.shared.VBrowserDetails;

import junit.framework.TestCase;

import com.vaadin.shared.VBrowserDetails;

public class TestVBrowserDetailsUserAgentParser extends TestCase {

private static final String FIREFOX30_WINDOWS = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.0.6) Gecko/2009011913 Firefox/3.0.6";

+ 3
- 3
server/src/com/vaadin/Application.java View File

@@ -26,9 +26,9 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.vaadin.server.AbstractUIProvider;
import com.vaadin.server.VaadinSession;
import com.vaadin.server.Terminal.ErrorEvent;
import com.vaadin.server.Terminal.ErrorListener;
import com.vaadin.server.VaadinSession;
import com.vaadin.server.WrappedRequest;
import com.vaadin.ui.UI;

@@ -100,8 +100,8 @@ public abstract class Application extends AbstractUIProvider implements
}

@Override
public UI createInstance(VaadinSession application, Class<? extends UI> type,
WrappedRequest request) {
public UI createInstance(VaadinSession application,
Class<? extends UI> type, WrappedRequest request) {
return getUIInstance(request);
}


+ 4
- 0
server/src/com/vaadin/data/util/AbstractProperty.java View File

@@ -133,6 +133,7 @@ public abstract class AbstractProperty<T> implements Property<T>,
* @deprecated Since 7.0, replaced by
* {@link #addReadOnlyStatusChangeListener(com.vaadin.data.Property.ReadOnlyStatusChangeListener)}
**/
@Override
@Deprecated
public void addListener(Property.ReadOnlyStatusChangeListener listener) {
addReadOnlyStatusChangeListener(listener);
@@ -156,6 +157,7 @@ public abstract class AbstractProperty<T> implements Property<T>,
* @deprecated Since 7.0, replaced by
* {@link #removeReadOnlyStatusChangeListener(com.vaadin.data.Property.ReadOnlyStatusChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Property.ReadOnlyStatusChangeListener listener) {
removeReadOnlyStatusChangeListener(listener);
@@ -218,6 +220,7 @@ public abstract class AbstractProperty<T> implements Property<T>,
* @deprecated Since 7.0, replaced by
* {@link #addValueChangeListener(com.vaadin.data.Property.ValueChangeListener)}
**/
@Override
@Deprecated
public void addListener(ValueChangeListener listener) {
addValueChangeListener(listener);
@@ -235,6 +238,7 @@ public abstract class AbstractProperty<T> implements Property<T>,
* @deprecated Since 7.0, replaced by
* {@link #removeValueChangeListener(com.vaadin.data.Property.ValueChangeListener)}
**/
@Override
@Deprecated
public void removeListener(ValueChangeListener listener) {
removeValueChangeListener(listener);

+ 4
- 0
server/src/com/vaadin/data/util/ContainerHierarchicalWrapper.java View File

@@ -722,6 +722,7 @@ public class ContainerHierarchicalWrapper implements Container.Hierarchical,
* @deprecated Since 7.0, replaced by
* {@link #addItemSetChangeListener(com.vaadin.data.Container.ItemSetChangeListener)}
**/
@Override
@Deprecated
public void addListener(Container.ItemSetChangeListener listener) {
addItemSetChangeListener(listener);
@@ -745,6 +746,7 @@ public class ContainerHierarchicalWrapper implements Container.Hierarchical,
* @deprecated Since 7.0, replaced by
* {@link #removeItemSetChangeListener(com.vaadin.data.Container.ItemSetChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Container.ItemSetChangeListener listener) {
removeItemSetChangeListener(listener);
@@ -769,6 +771,7 @@ public class ContainerHierarchicalWrapper implements Container.Hierarchical,
* @deprecated Since 7.0, replaced by
* {@link #addPropertySetChangeListener(com.vaadin.data.Container.PropertySetChangeListener)}
**/
@Override
@Deprecated
public void addListener(Container.PropertySetChangeListener listener) {
addPropertySetChangeListener(listener);
@@ -793,6 +796,7 @@ public class ContainerHierarchicalWrapper implements Container.Hierarchical,
* @deprecated Since 7.0, replaced by
* {@link #removePropertySetChangeListener(com.vaadin.data.Container.PropertySetChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Container.PropertySetChangeListener listener) {
removePropertySetChangeListener(listener);

+ 4
- 0
server/src/com/vaadin/data/util/ContainerOrderedWrapper.java View File

@@ -522,6 +522,7 @@ public class ContainerOrderedWrapper implements Container.Ordered,
* @deprecated Since 7.0, replaced by
* {@link #addItemSetChangeListener(com.vaadin.data.Container.ItemSetChangeListener)}
**/
@Override
@Deprecated
public void addListener(Container.ItemSetChangeListener listener) {
addItemSetChangeListener(listener);
@@ -545,6 +546,7 @@ public class ContainerOrderedWrapper implements Container.Ordered,
* @deprecated Since 7.0, replaced by
* {@link #removeItemSetChangeListener(com.vaadin.data.Container.ItemSetChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Container.ItemSetChangeListener listener) {
removeItemSetChangeListener(listener);
@@ -569,6 +571,7 @@ public class ContainerOrderedWrapper implements Container.Ordered,
* @deprecated Since 7.0, replaced by
* {@link #addPropertySetChangeListener(com.vaadin.data.Container.PropertySetChangeListener)}
**/
@Override
@Deprecated
public void addListener(Container.PropertySetChangeListener listener) {
addPropertySetChangeListener(listener);
@@ -593,6 +596,7 @@ public class ContainerOrderedWrapper implements Container.Ordered,
* @deprecated Since 7.0, replaced by
* {@link #removePropertySetChangeListener(com.vaadin.data.Container.PropertySetChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Container.PropertySetChangeListener listener) {
removePropertySetChangeListener(listener);

+ 4
- 0
server/src/com/vaadin/data/util/IndexedContainer.java View File

@@ -532,6 +532,7 @@ public class IndexedContainer extends
* @deprecated Since 7.0, replaced by
* {@link #addValueChangeListener(com.vaadin.data.Property.ValueChangeListener)}
**/
@Override
@Deprecated
public void addListener(Property.ValueChangeListener listener) {
addValueChangeListener(listener);
@@ -554,6 +555,7 @@ public class IndexedContainer extends
* @deprecated Since 7.0, replaced by
* {@link #removeValueChangeListener(com.vaadin.data.Property.ValueChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Property.ValueChangeListener listener) {
removeValueChangeListener(listener);
@@ -1013,6 +1015,7 @@ public class IndexedContainer extends
* @deprecated Since 7.0, replaced by
* {@link #addValueChangeListener(com.vaadin.data.Property.ValueChangeListener)}
**/
@Override
@Deprecated
public void addListener(Property.ValueChangeListener listener) {
addValueChangeListener(listener);
@@ -1034,6 +1037,7 @@ public class IndexedContainer extends
* @deprecated Since 7.0, replaced by
* {@link #removeValueChangeListener(com.vaadin.data.Property.ValueChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Property.ValueChangeListener listener) {
removeValueChangeListener(listener);

+ 2
- 0
server/src/com/vaadin/data/util/PropertysetItem.java View File

@@ -212,6 +212,7 @@ public class PropertysetItem implements Item, Item.PropertySetChangeNotifier,
* @deprecated Since 7.0, replaced by
* {@link #addPropertySetChangeListener(com.vaadin.data.Item.PropertySetChangeListener)}
**/
@Override
@Deprecated
public void addListener(Item.PropertySetChangeListener listener) {
addPropertySetChangeListener(listener);
@@ -235,6 +236,7 @@ public class PropertysetItem implements Item, Item.PropertySetChangeNotifier,
* @deprecated Since 7.0, replaced by
* {@link #removePropertySetChangeListener(com.vaadin.data.Item.PropertySetChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Item.PropertySetChangeListener listener) {
removePropertySetChangeListener(listener);

+ 2
- 0
server/src/com/vaadin/data/util/sqlcontainer/SQLContainer.java View File

@@ -1523,6 +1523,7 @@ public class SQLContainer implements Container, Container.Filterable,
* @deprecated Since 7.0, replaced by
* {@link #addItemSetChangeListener(com.vaadin.data.Container.ItemSetChangeListener)}
**/
@Override
@Deprecated
public void addListener(Container.ItemSetChangeListener listener) {
addItemSetChangeListener(listener);
@@ -1548,6 +1549,7 @@ public class SQLContainer implements Container, Container.Filterable,
* @deprecated Since 7.0, replaced by
* {@link #removeItemSetChangeListener(com.vaadin.data.Container.ItemSetChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Container.ItemSetChangeListener listener) {
removeItemSetChangeListener(listener);

+ 2
- 0
server/src/com/vaadin/data/util/sqlcontainer/query/TableQuery.java View File

@@ -715,6 +715,7 @@ public class TableQuery implements QueryDelegate,
* @deprecated Since 7.0, replaced by
* {@link #addRowIdChangeListener(com.vaadin.data.util.sqlcontainer.query.QueryDelegate.RowIdChangeListener)}
**/
@Override
@Deprecated
public void addListener(RowIdChangeListener listener) {
addRowIdChangeListener(listener);
@@ -734,6 +735,7 @@ public class TableQuery implements QueryDelegate,
* @deprecated Since 7.0, replaced by
* {@link #removeRowIdChangeListener(com.vaadin.data.util.sqlcontainer.query.QueryDelegate.RowIdChangeListener)}
**/
@Override
@Deprecated
public void removeListener(RowIdChangeListener listener) {
removeRowIdChangeListener(listener);

+ 1
- 1
server/src/com/vaadin/external/json/JSONException.java View File

@@ -27,6 +27,6 @@ public class JSONException extends Exception {

@Override
public Throwable getCause() {
return this.cause;
return cause;
}
}

+ 1
- 1
server/src/com/vaadin/external/json/JSONStringer.java View File

@@ -79,6 +79,6 @@ public class JSONStringer extends JSONWriter {
*/
@Override
public String toString() {
return this.mode == 'd' ? this.writer.toString() : null;
return mode == 'd' ? writer.toString() : null;
}
}

+ 2
- 2
server/src/com/vaadin/server/AbstractUIProvider.java View File

@@ -27,8 +27,8 @@ import com.vaadin.ui.UI;
public abstract class AbstractUIProvider implements UIProvider {

@Override
public UI createInstance(VaadinSession application, Class<? extends UI> type,
WrappedRequest request) {
public UI createInstance(VaadinSession application,
Class<? extends UI> type, WrappedRequest request) {
try {
return type.newInstance();
} catch (InstantiationException e) {

+ 3
- 3
server/src/com/vaadin/server/AddonContext.java View File

@@ -143,9 +143,9 @@ public class AddonContext {
}

/**
* Adds a listener that will be notified any time a new {@link VaadinSession}
* instance is started or more precisely directly after
* {@link VaadinSession#init()} has been invoked.
* Adds a listener that will be notified any time a new
* {@link VaadinSession} instance is started or more precisely directly
* after {@link VaadinSession#init()} has been invoked.
*
* @param applicationStartListener
* the application start listener that should be added

+ 1
- 0
server/src/com/vaadin/server/ApplicationConfiguration.java View File

@@ -18,6 +18,7 @@ package com.vaadin.server;

import java.util.Properties;

import org.apache.catalina.core.ApplicationContext;

/**
* A collection of properties configured for all applications as well as a way

+ 2
- 2
server/src/com/vaadin/server/ApplicationStartedEvent.java View File

@@ -18,7 +18,6 @@ package com.vaadin.server;

import java.util.EventObject;


/**
* Event used by
* {@link ApplicationStartedListener#applicationStarted(ApplicationStartedEvent)}
@@ -38,7 +37,8 @@ public class ApplicationStartedEvent extends EventObject {
* @param application
* the application that has been started
*/
public ApplicationStartedEvent(AddonContext context, VaadinSession application) {
public ApplicationStartedEvent(AddonContext context,
VaadinSession application) {
super(context);
this.application = application;
}

+ 3
- 4
server/src/com/vaadin/server/ApplicationStartedListener.java View File

@@ -18,11 +18,10 @@ package com.vaadin.server;

import java.util.EventListener;


/**
* Listener that gets notified when a new {@link VaadinSession} has been started.
* Add-ons can use this listener to automatically integrate with API tied to the
* Application API.
* Listener that gets notified when a new {@link VaadinSession} has been
* started. Add-ons can use this listener to automatically integrate with API
* tied to the Application API.
*
* @see AddonContext#addApplicationStartedListener(ApplicationStartedListener)
*

+ 1
- 2
server/src/com/vaadin/server/CommunicationManager.java View File

@@ -111,8 +111,7 @@ public class CommunicationManager extends AbstractCommunicationManager {
@Override
protected InputStream getThemeResourceAsStream(UI uI, String themeName,
String resource) {
VaadinServletSession context = (VaadinServletSession) uI
.getSession();
VaadinServletSession context = (VaadinServletSession) uI.getSession();
ServletContext servletContext = context.getHttpSession()
.getServletContext();
return servletContext.getResourceAsStream("/"

+ 60
- 66
server/src/com/vaadin/server/CustomizedSystemMessages.java View File

@@ -19,45 +19,44 @@ package com.vaadin.server;
import java.io.Serializable;

/**
* Contains the system messages used to notify the user about various
* critical situations that can occur.
* Contains the system messages used to notify the user about various critical
* situations that can occur.
* <p>
* Vaadin gets the SystemMessages from your application by calling a static
* getSystemMessages() method. By default the
* Application.getSystemMessages() is used. You can customize this by
* defining a static MyApplication.getSystemMessages() and returning
* CustomizedSystemMessages. Note that getSystemMessages() is static -
* changing the system messages will by default change the message for all
* users of the application.
* getSystemMessages() method. By default the Application.getSystemMessages() is
* used. You can customize this by defining a static
* MyApplication.getSystemMessages() and returning CustomizedSystemMessages.
* Note that getSystemMessages() is static - changing the system messages will
* by default change the message for all users of the application.
* </p>
* <p>
* The default behavior is to show a notification, and restart the
* application the the user clicks the message. <br/>
* Instead of restarting the application, you can set a specific URL that
* the user is taken to.<br/>
* Setting both caption and message to null will restart the application (or
* go to the specified URL) without displaying a notification.
* The default behavior is to show a notification, and restart the application
* the the user clicks the message. <br/>
* Instead of restarting the application, you can set a specific URL that the
* user is taken to.<br/>
* Setting both caption and message to null will restart the application (or go
* to the specified URL) without displaying a notification.
* set*NotificationEnabled(false) will achieve the same thing.
* </p>
* <p>
* The situations are:
* <li>Session expired: the user session has expired, usually due to
* inactivity.</li>
* <li>Session expired: the user session has expired, usually due to inactivity.
* </li>
* <li>Communication error: the client failed to contact the server, or the
* server returned and invalid response.</li>
* <li>Internal error: unhandled critical server error (e.g out of memory,
* database crash)
* <li>Out of sync: the client is not in sync with the server. E.g the user
* opens two windows showing the same application, but the application does
* not support this and uses the same Window instance. When the user makes
* changes in one of the windows - the other window is no longer in sync,
* and (for instance) pressing a button that is no longer present in the UI
* will cause a out-of-sync -situation.
* opens two windows showing the same application, but the application does not
* support this and uses the same Window instance. When the user makes changes
* in one of the windows - the other window is no longer in sync, and (for
* instance) pressing a button that is no longer present in the UI will cause a
* out-of-sync -situation.
* </p>
*/

public class CustomizedSystemMessages extends SystemMessages
implements Serializable {
public class CustomizedSystemMessages extends SystemMessages implements
Serializable {

/**
* Sets the URL to go to when the session has expired.
@@ -83,10 +82,10 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the caption of the notification. Set to null for no caption. If
* both caption and message are null, client automatically forwards to
* sessionExpiredUrl after timeout timer expires. Timer uses value read
* from HTTPSession.getMaxInactiveInterval()
* Sets the caption of the notification. Set to null for no caption. If both
* caption and message are null, client automatically forwards to
* sessionExpiredUrl after timeout timer expires. Timer uses value read from
* HTTPSession.getMaxInactiveInterval()
*
* @param sessionExpiredCaption
* the caption
@@ -96,10 +95,10 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the message of the notification. Set to null for no message. If
* both caption and message are null, client automatically forwards to
* sessionExpiredUrl after timeout timer expires. Timer uses value read
* from HTTPSession.getMaxInactiveInterval()
* Sets the message of the notification. Set to null for no message. If both
* caption and message are null, client automatically forwards to
* sessionExpiredUrl after timeout timer expires. Timer uses value read from
* HTTPSession.getMaxInactiveInterval()
*
* @param sessionExpiredMessage
* the message
@@ -131,26 +130,24 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the caption of the notification. Set to null for no caption. If
* both caption and message is null, the notification is disabled;
* Sets the caption of the notification. Set to null for no caption. If both
* caption and message is null, the notification is disabled;
*
* @param authenticationErrorCaption
* the caption
*/
public void setAuthenticationErrorCaption(
String authenticationErrorCaption) {
public void setAuthenticationErrorCaption(String authenticationErrorCaption) {
this.authenticationErrorCaption = authenticationErrorCaption;
}

/**
* Sets the message of the notification. Set to null for no message. If
* both caption and message is null, the notification is disabled;
* Sets the message of the notification. Set to null for no message. If both
* caption and message is null, the notification is disabled;
*
* @param authenticationErrorMessage
* the message
*/
public void setAuthenticationErrorMessage(
String authenticationErrorMessage) {
public void setAuthenticationErrorMessage(String authenticationErrorMessage) {
this.authenticationErrorMessage = authenticationErrorMessage;
}

@@ -177,26 +174,24 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the caption of the notification. Set to null for no caption. If
* both caption and message is null, the notification is disabled;
* Sets the caption of the notification. Set to null for no caption. If both
* caption and message is null, the notification is disabled;
*
* @param communicationErrorCaption
* the caption
*/
public void setCommunicationErrorCaption(
String communicationErrorCaption) {
public void setCommunicationErrorCaption(String communicationErrorCaption) {
this.communicationErrorCaption = communicationErrorCaption;
}

/**
* Sets the message of the notification. Set to null for no message. If
* both caption and message is null, the notification is disabled;
* Sets the message of the notification. Set to null for no message. If both
* caption and message is null, the notification is disabled;
*
* @param communicationErrorMessage
* the message
*/
public void setCommunicationErrorMessage(
String communicationErrorMessage) {
public void setCommunicationErrorMessage(String communicationErrorMessage) {
this.communicationErrorMessage = communicationErrorMessage;
}

@@ -223,8 +218,8 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the caption of the notification. Set to null for no caption. If
* both caption and message is null, the notification is disabled;
* Sets the caption of the notification. Set to null for no caption. If both
* caption and message is null, the notification is disabled;
*
* @param internalErrorCaption
* the caption
@@ -234,8 +229,8 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the message of the notification. Set to null for no message. If
* both caption and message is null, the notification is disabled;
* Sets the message of the notification. Set to null for no message. If both
* caption and message is null, the notification is disabled;
*
* @param internalErrorMessage
* the message
@@ -267,8 +262,8 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the caption of the notification. Set to null for no caption. If
* both caption and message is null, the notification is disabled;
* Sets the caption of the notification. Set to null for no caption. If both
* caption and message is null, the notification is disabled;
*
* @param outOfSyncCaption
* the caption
@@ -278,8 +273,8 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the message of the notification. Set to null for no message. If
* both caption and message is null, the notification is disabled;
* Sets the message of the notification. Set to null for no message. If both
* caption and message is null, the notification is disabled;
*
* @param outOfSyncMessage
* the message
@@ -299,13 +294,12 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Enables or disables the notification for "cookies disabled" messages.
* If disabled, the URL returned by {@link #getCookiesDisabledURL()} is
* loaded directly.
* Enables or disables the notification for "cookies disabled" messages. If
* disabled, the URL returned by {@link #getCookiesDisabledURL()} is loaded
* directly.
*
* @param cookiesDisabledNotificationEnabled
* true to enable "cookies disabled" messages, false
* otherwise
* true to enable "cookies disabled" messages, false otherwise
*/
public void setCookiesDisabledNotificationEnabled(
boolean cookiesDisabledNotificationEnabled) {
@@ -313,9 +307,9 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the caption of the "cookies disabled" notification. Set to null
* for no caption. If both caption and message is null, the notification
* is disabled.
* Sets the caption of the "cookies disabled" notification. Set to null for
* no caption. If both caption and message is null, the notification is
* disabled.
*
* @param cookiesDisabledCaption
* the caption for the "cookies disabled" notification
@@ -325,9 +319,9 @@ public class CustomizedSystemMessages extends SystemMessages
}

/**
* Sets the message of the "cookies disabled" notification. Set to null
* for no message. If both caption and message is null, the notification
* is disabled.
* Sets the message of the "cookies disabled" notification. Set to null for
* no message. If both caption and message is null, the notification is
* disabled.
*
* @param cookiesDisabledMessage
* the message for the "cookies disabled" notification

+ 2
- 1
server/src/com/vaadin/server/GAEVaadinServlet.java View File

@@ -320,7 +320,8 @@ public class GAEVaadinServlet extends VaadinServlet {
ObjectInputStream ois;
try {
ois = new ObjectInputStream(bais);
VaadinSession applicationContext = (VaadinSession) ois.readObject();
VaadinSession applicationContext = (VaadinSession) ois
.readObject();
applicationContext.storeInSession(new WrappedHttpSession(
session));
} catch (IOException e) {

+ 3
- 4
server/src/com/vaadin/server/LegacyVaadinPortlet.java View File

@@ -45,10 +45,9 @@ public class LegacyVaadinPortlet extends VaadinPortlet {
}

@Override
protected VaadinPortletSession createApplication(
PortletRequest request) throws PortletException {
VaadinPortletSession application = super
.createApplication(request);
protected VaadinPortletSession createApplication(PortletRequest request)
throws PortletException {
VaadinPortletSession application = super.createApplication(request);

// Must set current before running init()
VaadinSession.setCurrent(application);

+ 3
- 4
server/src/com/vaadin/server/LegacyVaadinServlet.java View File

@@ -45,10 +45,9 @@ public class LegacyVaadinServlet extends VaadinServlet {
}

@Override
protected VaadinServletSession createApplication(
HttpServletRequest request) throws ServletException {
VaadinServletSession application = super
.createApplication(request);
protected VaadinServletSession createApplication(HttpServletRequest request)
throws ServletException {
VaadinServletSession application = super.createApplication(request);

// Must set current before running init()
VaadinSession.setCurrent(application);

+ 1
- 2
server/src/com/vaadin/server/PortletCommunicationManager.java View File

@@ -156,8 +156,7 @@ public class PortletCommunicationManager extends AbstractCommunicationManager {
@Override
protected InputStream getThemeResourceAsStream(UI uI, String themeName,
String resource) {
VaadinPortletSession context = (VaadinPortletSession) uI
.getSession();
VaadinPortletSession context = (VaadinPortletSession) uI.getSession();
PortletContext portletContext = context.getPortletSession()
.getPortletContext();
return portletContext.getResourceAsStream("/"

+ 0
- 1
server/src/com/vaadin/server/RequestHandler.java View File

@@ -19,7 +19,6 @@ package com.vaadin.server;
import java.io.IOException;
import java.io.Serializable;


/**
* Handler for producing a response to non-UIDL requests. Handlers can be added
* to applications using {@link VaadinSession#addRequestHandler(RequestHandler)}

+ 0
- 1
server/src/com/vaadin/server/RequestTimer.java View File

@@ -18,7 +18,6 @@ package com.vaadin.server;

import java.io.Serializable;


/**
* Times the handling of requests and stores the information as an attribute in
* the request. The timing info is later passed on to the client in the UIDL and

+ 0
- 1
server/src/com/vaadin/server/ServerRpcManager.java View File

@@ -17,7 +17,6 @@
package com.vaadin.server;

import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.Map;

+ 20
- 29
server/src/com/vaadin/server/SystemMessages.java View File

@@ -18,15 +18,12 @@ package com.vaadin.server;

import java.io.Serializable;



/**
* Contains the system messages used to notify the user about various
* critical situations that can occur.
* Contains the system messages used to notify the user about various critical
* situations that can occur.
* <p>
* Customize by overriding the static
* {@link VaadinSession#getSystemMessages()} and returning
* {@link CustomizedSystemMessages}.
* Customize by overriding the static {@link VaadinSession#getSystemMessages()}
* and returning {@link CustomizedSystemMessages}.
* </p>
* <p>
* The defaults defined in this class are:
@@ -49,16 +46,15 @@ import java.io.Serializable;
* <li><b>outOfSyncURL</b> = null</li>
* <li><b>outOfSyncNotificationEnabled</b> = true</li>
* <li><b>outOfSyncCaption</b> = "Out of sync"</li>
* <li><b>outOfSyncMessage</b> = "Something has caused us to be out of sync
* with the server.<br/>
* <li><b>outOfSyncMessage</b> = "Something has caused us to be out of sync with
* the server.<br/>
* Take note of any unsaved data, and <u>click here</u> to re-sync."</li>
* <li><b>cookiesDisabledURL</b> = null</li>
* <li><b>cookiesDisabledNotificationEnabled</b> = true</li>
* <li><b>cookiesDisabledCaption</b> = "Cookies disabled"</li>
* <li><b>cookiesDisabledMessage</b> = "This application requires cookies to
* function.<br/>
* Please enable cookies in your browser and <u>click here</u> to try again.
* </li>
* Please enable cookies in your browser and <u>click here</u> to try again.</li>
* </ul>
* </p>
*
@@ -134,8 +130,7 @@ public class SystemMessages implements Serializable {
}

/**
* @return null to reload the application after communication error
* message.
* @return null to reload the application after communication error message.
*/
public String getCommunicationErrorURL() {
return communicationErrorURL;
@@ -198,8 +193,8 @@ public class SystemMessages implements Serializable {
}

/**
* @return null to reload the current URL after internal error message
* has been shown.
* @return null to reload the current URL after internal error message has
* been shown.
*/
public String getInternalErrorURL() {
return internalErrorURL;
@@ -216,8 +211,7 @@ public class SystemMessages implements Serializable {
* @return "Internal error"
*/
public String getInternalErrorCaption() {
return (internalErrorNotificationEnabled ? internalErrorCaption
: null);
return (internalErrorNotificationEnabled ? internalErrorCaption : null);
}

/**
@@ -226,8 +220,7 @@ public class SystemMessages implements Serializable {
* continue."
*/
public String getInternalErrorMessage() {
return (internalErrorNotificationEnabled ? internalErrorMessage
: null);
return (internalErrorNotificationEnabled ? internalErrorMessage : null);
}

/**
@@ -253,8 +246,7 @@ public class SystemMessages implements Serializable {

/**
* @return "Something has caused us to be out of sync with the server.<br/>
* Take note of any unsaved data, and <u>click here</u> to
* re-sync."
* Take note of any unsaved data, and <u>click here</u> to re-sync."
*/
public String getOutOfSyncMessage() {
return (outOfSyncNotificationEnabled ? outOfSyncMessage : null);
@@ -272,13 +264,12 @@ public class SystemMessages implements Serializable {
}

/**
* Determines if "cookies disabled" messages should be shown to the end
* user or not. If the notification is disabled the user will be
* immediately redirected to the URL returned by
* {@link #getCookiesDisabledURL()}.
* Determines if "cookies disabled" messages should be shown to the end user
* or not. If the notification is disabled the user will be immediately
* redirected to the URL returned by {@link #getCookiesDisabledURL()}.
*
* @return true to show "cookies disabled" messages to the end user,
* false to redirect to the given URL directly
* @return true to show "cookies disabled" messages to the end user, false
* to redirect to the given URL directly
*/
public boolean isCookiesDisabledNotificationEnabled() {
return cookiesDisabledNotificationEnabled;
@@ -296,8 +287,8 @@ public class SystemMessages implements Serializable {
}

/**
* Returns the message shown to the user when cookies are disabled in
* the browser.
* Returns the message shown to the user when cookies are disabled in the
* browser.
*
* @return The "cookies disabled" message
*/

+ 2
- 2
server/src/com/vaadin/server/UIProvider.java View File

@@ -23,8 +23,8 @@ public interface UIProvider {
public Class<? extends UI> getUIClass(VaadinSession application,
WrappedRequest request);

public UI createInstance(VaadinSession application, Class<? extends UI> type,
WrappedRequest request);
public UI createInstance(VaadinSession application,
Class<? extends UI> type, WrappedRequest request);

public String getPageTitleForUI(WrappedRequest request,
Class<? extends UI> uiClass);

+ 0
- 1
server/src/com/vaadin/server/UnsupportedBrowserHandler.java View File

@@ -18,7 +18,6 @@ package com.vaadin.server;
import java.io.IOException;
import java.io.Writer;


/**
* A {@link RequestHandler} that presents an informative page if the browser in
* use is unsupported. Recognizes Chrome Frame and allow it to be used.

+ 3
- 4
server/src/com/vaadin/server/VaadinPortlet.java View File

@@ -651,8 +651,7 @@ public class VaadinPortlet extends GenericPortlet implements Constants {
*/
private void handleOtherRequest(WrappedPortletRequest request,
WrappedResponse response, RequestType requestType,
VaadinSession application,
VaadinPortletSession applicationContext,
VaadinSession application, VaadinPortletSession applicationContext,
PortletCommunicationManager applicationManager)
throws PortletException, IOException, MalformedURLException {
if (requestType == RequestType.APPLICATION_RESOURCE
@@ -844,8 +843,8 @@ public class VaadinPortlet extends GenericPortlet implements Constants {
return newApplication;
}

protected VaadinPortletSession createApplication(
PortletRequest request) throws PortletException {
protected VaadinPortletSession createApplication(PortletRequest request)
throws PortletException {
VaadinPortletSession application = new VaadinPortletSession();

try {

+ 5
- 4
server/src/com/vaadin/server/VaadinServlet.java View File

@@ -625,8 +625,9 @@ public class VaadinServlet extends HttpServlet implements Constants {

}

private VaadinSession createAndRegisterApplication(HttpServletRequest request)
throws ServletException, MalformedURLException {
private VaadinSession createAndRegisterApplication(
HttpServletRequest request) throws ServletException,
MalformedURLException {
VaadinSession newApplication = createApplication(request);

try {
@@ -721,8 +722,8 @@ public class VaadinServlet extends HttpServlet implements Constants {
* @throws ServletException
* @throws MalformedURLException
*/
protected VaadinServletSession createApplication(
HttpServletRequest request) throws ServletException {
protected VaadinServletSession createApplication(HttpServletRequest request)
throws ServletException {
VaadinServletSession newApplication = new VaadinServletSession();

try {

+ 0
- 1
server/src/com/vaadin/server/WrappedHttpServletRequest.java View File

@@ -19,7 +19,6 @@ package com.vaadin.server;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletRequestWrapper;


/**
* Wrapper for {@link HttpServletRequest}.
*

+ 2
- 4
server/src/com/vaadin/ui/AbstractOrderedLayout.java View File

@@ -232,8 +232,7 @@ public abstract class AbstractOrderedLayout extends AbstractLayout implements
@Override
public void setComponentAlignment(Component childComponent,
Alignment alignment) {
ChildComponentData childData = getState().childData.get(
childComponent);
ChildComponentData childData = getState().childData.get(childComponent);
if (childData != null) {
// Alignments are bit masks
childData.alignmentBitmask = alignment.getBitMask();
@@ -252,8 +251,7 @@ public abstract class AbstractOrderedLayout extends AbstractLayout implements
*/
@Override
public Alignment getComponentAlignment(Component childComponent) {
ChildComponentData childData = getState().childData.get(
childComponent);
ChildComponentData childData = getState().childData.get(childComponent);
if (childData == null) {
throw new IllegalArgumentException(
"The given component is not a child of this layout");

+ 4
- 0
server/src/com/vaadin/ui/AbstractSelect.java View File

@@ -1493,6 +1493,7 @@ public abstract class AbstractSelect extends AbstractField<Object> implements
* @deprecated Since 7.0, replaced by
* {@link #addPropertySetChangeListener(com.vaadin.data.Container.PropertySetChangeListener)}
**/
@Override
@Deprecated
public void addListener(Container.PropertySetChangeListener listener) {
addPropertySetChangeListener(listener);
@@ -1518,6 +1519,7 @@ public abstract class AbstractSelect extends AbstractField<Object> implements
* @deprecated Since 7.0, replaced by
* {@link #removePropertySetChangeListener(com.vaadin.data.Container.PropertySetChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Container.PropertySetChangeListener listener) {
removePropertySetChangeListener(listener);
@@ -1541,6 +1543,7 @@ public abstract class AbstractSelect extends AbstractField<Object> implements
* @deprecated Since 7.0, replaced by
* {@link #addItemSetChangeListener(com.vaadin.data.Container.ItemSetChangeListener)}
**/
@Override
@Deprecated
public void addListener(Container.ItemSetChangeListener listener) {
addItemSetChangeListener(listener);
@@ -1566,6 +1569,7 @@ public abstract class AbstractSelect extends AbstractField<Object> implements
* @deprecated Since 7.0, replaced by
* {@link #removeItemSetChangeListener(com.vaadin.data.Container.ItemSetChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Container.ItemSetChangeListener listener) {
removeItemSetChangeListener(listener);

+ 5
- 0
server/src/com/vaadin/ui/ComboBox.java View File

@@ -701,6 +701,7 @@ public class ComboBox extends AbstractSelect implements
return filteringMode;
}

@Override
public void addBlurListener(BlurListener listener) {
addListener(BlurEvent.EVENT_ID, BlurEvent.class, listener,
BlurListener.blurMethod);
@@ -709,6 +710,7 @@ public class ComboBox extends AbstractSelect implements
/**
* @deprecated Since 7.0, replaced by {@link #addBlurListener(BlurListener)}
**/
@Override
@Deprecated
public void addListener(BlurListener listener) {
addBlurListener(listener);
@@ -723,6 +725,7 @@ public class ComboBox extends AbstractSelect implements
* @deprecated Since 7.0, replaced by
* {@link #removeBlurListener(BlurListener)}
**/
@Override
@Deprecated
public void removeListener(BlurListener listener) {
removeBlurListener(listener);
@@ -738,6 +741,7 @@ public class ComboBox extends AbstractSelect implements
* @deprecated Since 7.0, replaced by
* {@link #addFocusListener(FocusListener)}
**/
@Override
@Deprecated
public void addListener(FocusListener listener) {
addFocusListener(listener);
@@ -752,6 +756,7 @@ public class ComboBox extends AbstractSelect implements
* @deprecated Since 7.0, replaced by
* {@link #removeFocusListener(FocusListener)}
**/
@Override
@Deprecated
public void removeListener(FocusListener listener) {
removeFocusListener(listener);

+ 4
- 0
server/src/com/vaadin/ui/CustomField.java View File

@@ -230,6 +230,7 @@ public abstract class CustomField<T> extends AbstractField<T> implements
* @deprecated Since 7.0, replaced by
* {@link #addComponentAttachListener(com.vaadin.ui.ComponentContainer.ComponentAttachListener)}
**/
@Override
@Deprecated
public void addListener(ComponentAttachListener listener) {
addComponentAttachListener(listener);
@@ -245,6 +246,7 @@ public abstract class CustomField<T> extends AbstractField<T> implements
* @deprecated Since 7.0, replaced by
* {@link #removeComponentAttachListener(com.vaadin.ui.ComponentContainer.ComponentAttachListener)}
**/
@Override
@Deprecated
public void removeListener(ComponentAttachListener listener) {
removeComponentAttachListener(listener);
@@ -259,6 +261,7 @@ public abstract class CustomField<T> extends AbstractField<T> implements
* @deprecated Since 7.0, replaced by
* {@link #addComponentDetachListener(com.vaadin.ui.ComponentContainer.ComponentDetachListener)}
**/
@Override
@Deprecated
public void addListener(ComponentDetachListener listener) {
addComponentDetachListener(listener);
@@ -274,6 +277,7 @@ public abstract class CustomField<T> extends AbstractField<T> implements
* @deprecated Since 7.0, replaced by
* {@link #removeComponentDetachListener(com.vaadin.ui.ComponentContainer.ComponentDetachListener)}
**/
@Override
@Deprecated
public void removeListener(ComponentDetachListener listener) {
removeComponentDetachListener(listener);

+ 5
- 1
server/src/com/vaadin/ui/DateField.java View File

@@ -36,9 +36,9 @@ import com.vaadin.event.FieldEvents.BlurEvent;
import com.vaadin.event.FieldEvents.BlurListener;
import com.vaadin.event.FieldEvents.FocusEvent;
import com.vaadin.event.FieldEvents.FocusListener;
import com.vaadin.server.LegacyComponent;
import com.vaadin.server.PaintException;
import com.vaadin.server.PaintTarget;
import com.vaadin.server.LegacyComponent;
import com.vaadin.shared.ui.datefield.DateFieldConstants;

/**
@@ -750,6 +750,7 @@ public class DateField extends AbstractField<Date> implements
* @deprecated Since 7.0, replaced by
* {@link #addFocusListener(FocusListener)}
**/
@Override
@Deprecated
public void addListener(FocusListener listener) {
addFocusListener(listener);
@@ -764,6 +765,7 @@ public class DateField extends AbstractField<Date> implements
* @deprecated Since 7.0, replaced by
* {@link #removeFocusListener(FocusListener)}
**/
@Override
@Deprecated
public void removeListener(FocusListener listener) {
removeFocusListener(listener);
@@ -778,6 +780,7 @@ public class DateField extends AbstractField<Date> implements
/**
* @deprecated Since 7.0, replaced by {@link #addBlurListener(BlurListener)}
**/
@Override
@Deprecated
public void addListener(BlurListener listener) {
addBlurListener(listener);
@@ -792,6 +795,7 @@ public class DateField extends AbstractField<Date> implements
* @deprecated Since 7.0, replaced by
* {@link #removeBlurListener(BlurListener)}
**/
@Override
@Deprecated
public void removeListener(BlurListener listener) {
removeBlurListener(listener);

+ 1
- 1
server/src/com/vaadin/ui/DragAndDropWrapper.java View File

@@ -29,10 +29,10 @@ import com.vaadin.event.dd.DropHandler;
import com.vaadin.event.dd.DropTarget;
import com.vaadin.event.dd.TargetDetails;
import com.vaadin.event.dd.TargetDetailsImpl;
import com.vaadin.server.LegacyComponent;
import com.vaadin.server.PaintException;
import com.vaadin.server.PaintTarget;
import com.vaadin.server.StreamVariable;
import com.vaadin.server.LegacyComponent;
import com.vaadin.shared.MouseEventDetails;
import com.vaadin.shared.ui.dd.HorizontalDropLocation;
import com.vaadin.shared.ui.dd.VerticalDropLocation;

+ 1
- 1
server/src/com/vaadin/ui/Embedded.java View File

@@ -22,10 +22,10 @@ import java.util.Map;

import com.vaadin.event.MouseEvents.ClickEvent;
import com.vaadin.event.MouseEvents.ClickListener;
import com.vaadin.server.LegacyComponent;
import com.vaadin.server.PaintException;
import com.vaadin.server.PaintTarget;
import com.vaadin.server.Resource;
import com.vaadin.server.LegacyComponent;
import com.vaadin.shared.EventId;
import com.vaadin.shared.MouseEventDetails;
import com.vaadin.shared.ui.embedded.EmbeddedConstants;

+ 2
- 4
server/src/com/vaadin/ui/Flash.java View File

@@ -34,8 +34,7 @@ public class Flash extends AbstractEmbedded {
*/
public void setCodebase(String codebase) {
if (codebase != getState().codebase
|| (codebase != null && !codebase.equals(getState()
.codebase))) {
|| (codebase != null && !codebase.equals(getState().codebase))) {
getState().codebase = codebase;
requestRepaint();
}
@@ -53,8 +52,7 @@ public class Flash extends AbstractEmbedded {
*/
public void setCodetype(String codetype) {
if (codetype != getState().codetype
|| (codetype != null && !codetype.equals(getState()
.codetype))) {
|| (codetype != null && !codetype.equals(getState().codetype))) {
getState().codetype = codetype;
requestRepaint();
}

+ 2
- 0
server/src/com/vaadin/ui/Label.java View File

@@ -357,6 +357,7 @@ public class Label extends AbstractComponent implements Property<String>,
* @deprecated Since 7.0, replaced by
* {@link #addValueChangeListener(com.vaadin.data.Property.ValueChangeListener)}
**/
@Override
@Deprecated
public void addListener(Property.ValueChangeListener listener) {
addValueChangeListener(listener);
@@ -379,6 +380,7 @@ public class Label extends AbstractComponent implements Property<String>,
* @deprecated Since 7.0, replaced by
* {@link #removeValueChangeListener(com.vaadin.data.Property.ValueChangeListener)}
**/
@Override
@Deprecated
public void removeListener(Property.ValueChangeListener listener) {
removeValueChangeListener(listener);

+ 1
- 1
server/src/com/vaadin/ui/MenuBar.java View File

@@ -22,10 +22,10 @@ import java.util.List;
import java.util.Map;
import java.util.Stack;

import com.vaadin.server.LegacyComponent;
import com.vaadin.server.PaintException;
import com.vaadin.server.PaintTarget;
import com.vaadin.server.Resource;
import com.vaadin.server.LegacyComponent;
import com.vaadin.shared.ui.menubar.MenuBarConstants;

/**

+ 4
- 0
server/src/com/vaadin/ui/OptionGroup.java View File

@@ -97,6 +97,7 @@ public class OptionGroup extends AbstractSelect implements
/**
* @deprecated Since 7.0, replaced by {@link #addBlurListener(BlurListener)}
**/
@Override
@Deprecated
public void addListener(BlurListener listener) {
addBlurListener(listener);
@@ -111,6 +112,7 @@ public class OptionGroup extends AbstractSelect implements
* @deprecated Since 7.0, replaced by
* {@link #removeBlurListener(BlurListener)}
**/
@Override
@Deprecated
public void removeListener(BlurListener listener) {
removeBlurListener(listener);
@@ -126,6 +128,7 @@ public class OptionGroup extends AbstractSelect implements
* @deprecated Since 7.0, replaced by
* {@link #addFocusListener(FocusListener)}
**/
@Override
@Deprecated
public void addListener(FocusListener listener) {
addFocusListener(listener);
@@ -141,6 +144,7 @@ public class OptionGroup extends AbstractSelect implements
* @deprecated Since 7.0, replaced by
* {@link #removeFocusListener(FocusListener)}
**/
@Override
@Deprecated
public void removeListener(FocusListener listener) {
removeFocusListener(listener);

+ 1
- 1
server/src/com/vaadin/ui/ProgressIndicator.java View File

@@ -20,9 +20,9 @@ import java.util.Map;

import com.vaadin.data.Property;
import com.vaadin.data.util.ObjectProperty;
import com.vaadin.server.LegacyComponent;
import com.vaadin.server.PaintException;
import com.vaadin.server.PaintTarget;
import com.vaadin.server.LegacyComponent;

/**
* <code>ProgressIndicator</code> is component that shows user state of a

+ 1
- 1
server/src/com/vaadin/ui/RichTextArea.java View File

@@ -19,9 +19,9 @@ package com.vaadin.ui;
import java.util.Map;

import com.vaadin.data.Property;
import com.vaadin.server.LegacyComponent;
import com.vaadin.server.PaintException;
import com.vaadin.server.PaintTarget;
import com.vaadin.server.LegacyComponent;

/**
* A simple RichTextArea to edit HTML format text.

+ 4
- 0
server/src/com/vaadin/ui/TabSheet.java View File

@@ -1238,6 +1238,7 @@ public class TabSheet extends AbstractComponentContainer implements Focusable,
/**
* @deprecated Since 7.0, replaced by {@link #addBlurListener(BlurListener)}
**/
@Override
@Deprecated
public void addListener(BlurListener listener) {
addBlurListener(listener);
@@ -1252,6 +1253,7 @@ public class TabSheet extends AbstractComponentContainer implements Focusable,
* @deprecated Since 7.0, replaced by
* {@link #removeBlurListener(BlurListener)}
**/
@Override
@Deprecated
public void removeListener(BlurListener listener) {
removeBlurListener(listener);
@@ -1267,6 +1269,7 @@ public class TabSheet extends AbstractComponentContainer implements Focusable,
* @deprecated Since 7.0, replaced by
* {@link #addFocusListener(FocusListener)}
**/
@Override
@Deprecated
public void addListener(FocusListener listener) {
addFocusListener(listener);
@@ -1281,6 +1284,7 @@ public class TabSheet extends AbstractComponentContainer implements Focusable,
* @deprecated Since 7.0, replaced by
* {@link #removeFocusListener(FocusListener)}
**/
@Override
@Deprecated
public void removeListener(FocusListener listener) {
removeFocusListener(listener);

+ 2
- 0
server/src/com/vaadin/ui/Table.java View File

@@ -4626,6 +4626,7 @@ public class Table extends AbstractSelect implements Action.Container,
* @deprecated Since 7.0, replaced by
* {@link #addItemClickListener(ItemClickListener)}
**/
@Override
@Deprecated
public void addListener(ItemClickListener listener) {
addItemClickListener(listener);
@@ -4641,6 +4642,7 @@ public class Table extends AbstractSelect implements Action.Container,
* @deprecated Since 7.0, replaced by
* {@link #removeItemClickListener(ItemClickListener)}
**/
@Override
@Deprecated
public void removeListener(ItemClickListener listener) {
removeItemClickListener(listener);

+ 2
- 0
server/src/com/vaadin/ui/Tree.java View File

@@ -1205,6 +1205,7 @@ public class Tree extends AbstractSelect implements Container.Hierarchical,
* @deprecated Since 7.0, replaced by
* {@link #addItemClickListener(ItemClickListener)}
**/
@Override
@Deprecated
public void addListener(ItemClickListener listener) {
addItemClickListener(listener);
@@ -1220,6 +1221,7 @@ public class Tree extends AbstractSelect implements Container.Hierarchical,
* @deprecated Since 7.0, replaced by
* {@link #removeItemClickListener(ItemClickListener)}
**/
@Override
@Deprecated
public void removeListener(ItemClickListener listener) {
removeItemClickListener(listener);

+ 2
- 2
server/src/com/vaadin/ui/UI.java View File

@@ -956,8 +956,8 @@ public abstract class UI extends AbstractComponentContainer implements
throw new IllegalStateException("UI id has already been defined");
}
this.uiId = uiId;
theme = getSession().getUiProvider(request, getClass())
.getThemeForUI(request, getClass());
theme = getSession().getUiProvider(request, getClass()).getThemeForUI(
request, getClass());

getPage().init(request);


+ 1
- 1
server/src/com/vaadin/ui/Upload.java View File

@@ -24,11 +24,11 @@ import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;

import com.vaadin.server.LegacyComponent;
import com.vaadin.server.NoInputStreamException;
import com.vaadin.server.NoOutputStreamException;
import com.vaadin.server.PaintException;
import com.vaadin.server.PaintTarget;
import com.vaadin.server.LegacyComponent;
import com.vaadin.server.StreamVariable.StreamingProgressEvent;

/**

+ 216
- 90
server/src/org/jsoup/Connection.java View File

@@ -1,24 +1,29 @@
package org.jsoup;

import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;

import java.io.IOException;
import java.net.URL;
import java.util.Map;
import java.util.Collection;
import java.io.IOException;
import java.util.Map;

import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;

/**
* A Connection provides a convenient interface to fetch content from the web, and parse them into Documents.
* A Connection provides a convenient interface to fetch content from the web,
* and parse them into Documents.
* <p>
* To get a new Connection, use {@link org.jsoup.Jsoup#connect(String)}. Connections contain {@link Connection.Request}
* and {@link Connection.Response} objects. The request objects are reusable as prototype requests.
* To get a new Connection, use {@link org.jsoup.Jsoup#connect(String)}.
* Connections contain {@link Connection.Request} and
* {@link Connection.Response} objects. The request objects are reusable as
* prototype requests.
* <p>
* Request configuration can be made using either the shortcut methods in Connection (e.g. {@link #userAgent(String)}),
* or by methods in the Connection.Request object directly. All request configuration must be made before the request
* is executed.
* Request configuration can be made using either the shortcut methods in
* Connection (e.g. {@link #userAgent(String)}), or by methods in the
* Connection.Request object directly. All request configuration must be made
* before the request is executed.
* <p>
* The Connection interface is <b>currently in beta</b> and subject to change. Comments, suggestions, and bug reports are welcome.
* The Connection interface is <b>currently in beta</b> and subject to change.
* Comments, suggestions, and bug reports are welcome.
*/
public interface Connection {

@@ -31,102 +36,140 @@ public interface Connection {

/**
* Set the request URL to fetch. The protocol must be HTTP or HTTPS.
* @param url URL to connect to
*
* @param url
* URL to connect to
* @return this Connection, for chaining
*/
public Connection url(URL url);

/**
* Set the request URL to fetch. The protocol must be HTTP or HTTPS.
* @param url URL to connect to
*
* @param url
* URL to connect to
* @return this Connection, for chaining
*/
public Connection url(String url);

/**
* Set the request user-agent header.
* @param userAgent user-agent to use
*
* @param userAgent
* user-agent to use
* @return this Connection, for chaining
*/
public Connection userAgent(String userAgent);

/**
* Set the request timeouts (connect and read). If a timeout occurs, an IOException will be thrown. The default
* timeout is 3 seconds (3000 millis). A timeout of zero is treated as an infinite timeout.
* @param millis number of milliseconds (thousandths of a second) before timing out connects or reads.
* Set the request timeouts (connect and read). If a timeout occurs, an
* IOException will be thrown. The default timeout is 3 seconds (3000
* millis). A timeout of zero is treated as an infinite timeout.
*
* @param millis
* number of milliseconds (thousandths of a second) before timing
* out connects or reads.
* @return this Connection, for chaining
*/
public Connection timeout(int millis);

/**
* Set the request referrer (aka "referer") header.
* @param referrer referrer to use
*
* @param referrer
* referrer to use
* @return this Connection, for chaining
*/
public Connection referrer(String referrer);

/**
* Configures the connection to (not) follow server redirects. By default this is <b>true</b>.
* @param followRedirects true if server redirects should be followed.
* Configures the connection to (not) follow server redirects. By default
* this is <b>true</b>.
*
* @param followRedirects
* true if server redirects should be followed.
* @return this Connection, for chaining
*/
public Connection followRedirects(boolean followRedirects);

/**
* Set the request method to use, GET or POST. Default is GET.
* @param method HTTP request method
*
* @param method
* HTTP request method
* @return this Connection, for chaining
*/
public Connection method(Method method);

/**
* Configures the connection to not throw exceptions when a HTTP error occurs. (4xx - 5xx, e.g. 404 or 500). By
* default this is <b>false</b>; an IOException is thrown if an error is encountered. If set to <b>true</b>, the
* response is populated with the error body, and the status message will reflect the error.
* @param ignoreHttpErrors - false (default) if HTTP errors should be ignored.
* Configures the connection to not throw exceptions when a HTTP error
* occurs. (4xx - 5xx, e.g. 404 or 500). By default this is <b>false</b>; an
* IOException is thrown if an error is encountered. If set to <b>true</b>,
* the response is populated with the error body, and the status message
* will reflect the error.
*
* @param ignoreHttpErrors
* - false (default) if HTTP errors should be ignored.
* @return this Connection, for chaining
*/
public Connection ignoreHttpErrors(boolean ignoreHttpErrors);

/**
* Ignore the document's Content-Type when parsing the response. By default this is <b>false</b>, an unrecognised
* content-type will cause an IOException to be thrown. (This is to prevent producing garbage by attempting to parse
* a JPEG binary image, for example.) Set to true to force a parse attempt regardless of content type.
* @param ignoreContentType set to true if you would like the content type ignored on parsing the response into a
* Document.
* Ignore the document's Content-Type when parsing the response. By default
* this is <b>false</b>, an unrecognised content-type will cause an
* IOException to be thrown. (This is to prevent producing garbage by
* attempting to parse a JPEG binary image, for example.) Set to true to
* force a parse attempt regardless of content type.
*
* @param ignoreContentType
* set to true if you would like the content type ignored on
* parsing the response into a Document.
* @return this Connection, for chaining
*/
public Connection ignoreContentType(boolean ignoreContentType);

/**
* Add a request data parameter. Request parameters are sent in the request query string for GETs, and in the request
* body for POSTs. A request may have multiple values of the same name.
* @param key data key
* @param value data value
* Add a request data parameter. Request parameters are sent in the request
* query string for GETs, and in the request body for POSTs. A request may
* have multiple values of the same name.
*
* @param key
* data key
* @param value
* data value
* @return this Connection, for chaining
*/
public Connection data(String key, String value);

/**
* Adds all of the supplied data to the request data parameters
* @param data map of data parameters
*
* @param data
* map of data parameters
* @return this Connection, for chaining
*/
public Connection data(Map<String, String> data);

/**
* Add a number of request data parameters. Multiple parameters may be set at once, e.g.:
* <code>.data("name", "jsoup", "language", "Java", "language", "English");</code> creates a query string like:
* Add a number of request data parameters. Multiple parameters may be set
* at once, e.g.:
* <code>.data("name", "jsoup", "language", "Java", "language", "English");</code>
* creates a query string like:
* <code>?name=jsoup&language=Java&language=English</code>
* @param keyvals a set of key value pairs.
*
* @param keyvals
* a set of key value pairs.
* @return this Connection, for chaining
*/
public Connection data(String... keyvals);

/**
* Set a request header.
* @param name header name
* @param value header value
*
* @param name
* header name
* @param value
* header value
* @return this Connection, for chaining
* @see org.jsoup.Connection.Request#headers()
*/
@@ -134,111 +177,141 @@ public interface Connection {

/**
* Set a cookie to be sent in the request.
* @param name name of cookie
* @param value value of cookie
*
* @param name
* name of cookie
* @param value
* value of cookie
* @return this Connection, for chaining
*/
public Connection cookie(String name, String value);

/**
* Adds each of the supplied cookies to the request.
* @param cookies map of cookie name -> value pairs
*
* @param cookies
* map of cookie name -> value pairs
* @return this Connection, for chaining
*/
public Connection cookies(Map<String, String> cookies);

/**
* Provide an alternate parser to use when parsing the response to a Document.
* @param parser alternate parser
* Provide an alternate parser to use when parsing the response to a
* Document.
*
* @param parser
* alternate parser
* @return this Connection, for chaining
*/
public Connection parser(Parser parser);

/**
* Execute the request as a GET, and parse the result.
*
* @return parsed Document
* @throws IOException on error
* @throws IOException
* on error
*/
public Document get() throws IOException;

/**
* Execute the request as a POST, and parse the result.
*
* @return parsed Document
* @throws IOException on error
* @throws IOException
* on error
*/
public Document post() throws IOException;

/**
* Execute the request.
*
* @return a response object
* @throws IOException on error
* @throws IOException
* on error
*/
public Response execute() throws IOException;

/**
* Get the request object associated with this connection
*
* @return request
*/
public Request request();

/**
* Set the connection's request
* @param request new request object
*
* @param request
* new request object
* @return this Connection, for chaining
*/
public Connection request(Request request);

/**
* Get the response, once the request has been executed
*
* @return response
*/
public Response response();

/**
* Set the connection's response
* @param response new response
*
* @param response
* new response
* @return this Connection, for chaining
*/
public Connection response(Response response);


/**
* Common methods for Requests and Responses
* @param <T> Type of Base, either Request or Response
*
* @param <T>
* Type of Base, either Request or Response
*/
interface Base<T extends Base> {

/**
* Get the URL
*
* @return URL
*/
public URL url();

/**
* Set the URL
* @param url new URL
*
* @param url
* new URL
* @return this, for chaining
*/
public T url(URL url);

/**
* Get the request method
*
* @return method
*/
public Method method();

/**
* Set the request method
* @param method new method
*
* @param method
* new method
* @return this, for chaining
*/
public T method(Method method);

/**
* Get the value of a header. This is a simplified header model, where a header may only have one value.
* Get the value of a header. This is a simplified header model, where a
* header may only have one value.
* <p>
* Header names are case insensitive.
* @param name name of header (case insensitive)
*
* @param name
* name of header (case insensitive)
* @return value of header, or null if not set.
* @see #hasHeader(String)
* @see #cookie(String)
@@ -246,29 +319,38 @@ public interface Connection {
public String header(String name);

/**
* Set a header. This method will overwrite any existing header with the same case insensitive name.
* @param name Name of header
* @param value Value of header
* Set a header. This method will overwrite any existing header with the
* same case insensitive name.
*
* @param name
* Name of header
* @param value
* Value of header
* @return this, for chaining
*/
public T header(String name, String value);

/**
* Check if a header is present
* @param name name of header (case insensitive)
*
* @param name
* name of header (case insensitive)
* @return if the header is present in this request/response
*/
public boolean hasHeader(String name);

/**
* Remove a header by name
* @param name name of header to remove (case insensitive)
*
* @param name
* name of header to remove (case insensitive)
* @return this, for chaining
*/
public T removeHeader(String name);

/**
* Retrieve all of the request/response headers as a map
*
* @return headers
*/
public Map<String, String> headers();
@@ -276,37 +358,48 @@ public interface Connection {
/**
* Get a cookie value by name from this request/response.
* <p>
* Response objects have a simplified cookie model. Each cookie set in the response is added to the response
* object's cookie key=value map. The cookie's path, domain, and expiry date are ignored.
* @param name name of cookie to retrieve.
* Response objects have a simplified cookie model. Each cookie set in
* the response is added to the response object's cookie key=value map.
* The cookie's path, domain, and expiry date are ignored.
*
* @param name
* name of cookie to retrieve.
* @return value of cookie, or null if not set
*/
public String cookie(String name);

/**
* Set a cookie in this request/response.
* @param name name of cookie
* @param value value of cookie
*
* @param name
* name of cookie
* @param value
* value of cookie
* @return this, for chaining
*/
public T cookie(String name, String value);

/**
* Check if a cookie is present
* @param name name of cookie
*
* @param name
* name of cookie
* @return if the cookie is present in this request/response
*/
public boolean hasCookie(String name);

/**
* Remove a cookie by name
* @param name name of cookie to remove
*
* @param name
* name of cookie to remove
* @return this, for chaining
*/
public T removeCookie(String name);

/**
* Retrieve all of the request/response cookies as a map
*
* @return cookies
*/
public Map<String, String> cookies();
@@ -319,79 +412,99 @@ public interface Connection {
public interface Request extends Base<Request> {
/**
* Get the request timeout, in milliseconds.
*
* @return the timeout in milliseconds.
*/
public int timeout();

/**
* Update the request timeout.
* @param millis timeout, in milliseconds
*
* @param millis
* timeout, in milliseconds
* @return this Request, for chaining
*/
public Request timeout(int millis);

/**
* Get the current followRedirects configuration.
*
* @return true if followRedirects is enabled.
*/
public boolean followRedirects();

/**
* Configures the request to (not) follow server redirects. By default this is <b>true</b>.
*
* @param followRedirects true if server redirects should be followed.
* Configures the request to (not) follow server redirects. By default
* this is <b>true</b>.
*
* @param followRedirects
* true if server redirects should be followed.
* @return this Request, for chaining
*/
public Request followRedirects(boolean followRedirects);

/**
* Get the current ignoreHttpErrors configuration.
* @return true if errors will be ignored; false (default) if HTTP errors will cause an IOException to be thrown.
*
* @return true if errors will be ignored; false (default) if HTTP
* errors will cause an IOException to be thrown.
*/
public boolean ignoreHttpErrors();

/**
* Configures the request to ignore HTTP errors in the response.
* @param ignoreHttpErrors set to true to ignore HTTP errors.
/**
* Configures the request to ignore HTTP errors in the response.
*
* @param ignoreHttpErrors
* set to true to ignore HTTP errors.
* @return this Request, for chaining
*/
*/
public Request ignoreHttpErrors(boolean ignoreHttpErrors);

/**
* Get the current ignoreContentType configuration.
* @return true if invalid content-types will be ignored; false (default) if they will cause an IOException to be thrown.
*
* @return true if invalid content-types will be ignored; false
* (default) if they will cause an IOException to be thrown.
*/
public boolean ignoreContentType();

/**
* Configures the request to ignore the Content-Type of the response.
* @param ignoreContentType set to true to ignore the content type.
* Configures the request to ignore the Content-Type of the response.
*
* @param ignoreContentType
* set to true to ignore the content type.
* @return this Request, for chaining
*/
*/
public Request ignoreContentType(boolean ignoreContentType);

/**
* Add a data parameter to the request
* @param keyval data to add.
*
* @param keyval
* data to add.
* @return this Request, for chaining
*/
public Request data(KeyVal keyval);

/**
* Get all of the request's data parameters
*
* @return collection of keyvals
*/
public Collection<KeyVal> data();

/**
* Specify the parser to use when parsing the document.
* @param parser parser to use.
*
* @param parser
* parser to use.
* @return this Request, for chaining
*/
public Request parser(Parser parser);

/**
* Get the current parser to use when parsing the document.
*
* @return current Parser
*/
public Parser parser();
@@ -401,46 +514,54 @@ public interface Connection {
* Represents a HTTP response.
*/
public interface Response extends Base<Response> {
/**
/**
* Get the status code of the response.
*
* @return status code
*/
public int statusCode();

/**
* Get the status message of the response.
*
* @return status message
*/
public String statusMessage();

/**
* Get the character set name of the response.
*
* @return character set name
*/
public String charset();

/**
* Get the response content type (e.g. "text/html");
*
* @return the response content type
*/
public String contentType();

/**
* Parse the body of the response as a Document.
*
* @return a parsed Document
* @throws IOException on error
* @throws IOException
* on error
*/
public Document parse() throws IOException;

/**
* Get the body of the response as a plain string.
*
* @return body
*/
public String body();

/**
* Get the body of the response as an array of bytes.
*
* @return body bytes
*/
public byte[] bodyAsBytes();
@@ -453,29 +574,34 @@ public interface Connection {

/**
* Update the key of a keyval
* @param key new key
*
* @param key
* new key
* @return this KeyVal, for chaining
*/
public KeyVal key(String key);

/**
* Get the key of a keyval
*
* @return the key
*/
public String key();

/**
* Update the value of a keyval
* @param value the new value
*
* @param value
* the new value
* @return this KeyVal, for chaining
*/
public KeyVal value(String value);

/**
* Get the value of a keyval
*
* @return the value
*/
public String value();
}
}


+ 194
- 130
server/src/org/jsoup/Jsoup.java View File

@@ -1,178 +1,233 @@
package org.jsoup;

import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.jsoup.safety.Cleaner;
import org.jsoup.safety.Whitelist;
import org.jsoup.helper.DataUtil;
import org.jsoup.helper.HttpConnection;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

/**
The core public access point to the jsoup functionality.
import org.jsoup.helper.DataUtil;
import org.jsoup.helper.HttpConnection;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.jsoup.safety.Cleaner;
import org.jsoup.safety.Whitelist;

@author Jonathan Hedley */
/**
* The core public access point to the jsoup functionality.
*
* @author Jonathan Hedley
*/
public class Jsoup {
private Jsoup() {}
private Jsoup() {
}

/**
Parse HTML into a Document. The parser will make a sensible, balanced document tree out of any HTML.

@param html HTML to parse
@param baseUri The URL where the HTML was retrieved from. Used to resolve relative URLs to absolute URLs, that occur
before the HTML declares a {@code <base href>} tag.
@return sane HTML
* Parse HTML into a Document. The parser will make a sensible, balanced
* document tree out of any HTML.
*
* @param html
* HTML to parse
* @param baseUri
* The URL where the HTML was retrieved from. Used to resolve
* relative URLs to absolute URLs, that occur before the HTML
* declares a {@code <base href>} tag.
* @return sane HTML
*/
public static Document parse(String html, String baseUri) {
return Parser.parse(html, baseUri);
}

/**
Parse HTML into a Document, using the provided Parser. You can provide an alternate parser, such as a simple XML
(non-HTML) parser.

@param html HTML to parse
@param baseUri The URL where the HTML was retrieved from. Used to resolve relative URLs to absolute URLs, that occur
before the HTML declares a {@code <base href>} tag.
@param parser alternate {@link Parser#xmlParser() parser} to use.
@return sane HTML
* Parse HTML into a Document, using the provided Parser. You can provide an
* alternate parser, such as a simple XML (non-HTML) parser.
*
* @param html
* HTML to parse
* @param baseUri
* The URL where the HTML was retrieved from. Used to resolve
* relative URLs to absolute URLs, that occur before the HTML
* declares a {@code <base href>} tag.
* @param parser
* alternate {@link Parser#xmlParser() parser} to use.
* @return sane HTML
*/
public static Document parse(String html, String baseUri, Parser parser) {
return parser.parseInput(html, baseUri);
}

/**
Parse HTML into a Document. As no base URI is specified, absolute URL detection relies on the HTML including a
{@code <base href>} tag.
@param html HTML to parse
@return sane HTML
@see #parse(String, String)
* Parse HTML into a Document. As no base URI is specified, absolute URL
* detection relies on the HTML including a {@code <base href>} tag.
*
* @param html
* HTML to parse
* @return sane HTML
* @see #parse(String, String)
*/
public static Document parse(String html) {
return Parser.parse(html, "");
}

/**
* Creates a new {@link Connection} to a URL. Use to fetch and parse a HTML page.
* Creates a new {@link Connection} to a URL. Use to fetch and parse a HTML
* page.
* <p>
* Use examples:
* <ul>
* <li><code>Document doc = Jsoup.connect("http://example.com").userAgent("Mozilla").data("name", "jsoup").get();</code></li>
* <li><code>Document doc = Jsoup.connect("http://example.com").cookie("auth", "token").post();
* <li>
* <code>Document doc = Jsoup.connect("http://example.com").userAgent("Mozilla").data("name", "jsoup").get();</code>
* </li>
* <li>
* <code>Document doc = Jsoup.connect("http://example.com").cookie("auth", "token").post();
* </ul>
* @param url URL to connect to. The protocol must be {@code http} or {@code https}.
* @return the connection. You can add data, cookies, and headers; set the user-agent, referrer, method; and then execute.
*
* @param url
* URL to connect to. The protocol must be {@code http} or
* {@code https}.
* @return the connection. You can add data, cookies, and headers; set the
* user-agent, referrer, method; and then execute.
*/
public static Connection connect(String url) {
return HttpConnection.connect(url);
}

/**
Parse the contents of a file as HTML.

@param in file to load HTML from
@param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
present, or fall back to {@code UTF-8} (which is often safe to do).
@param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
@return sane HTML

@throws IOException if the file could not be found, or read, or if the charsetName is invalid.
* Parse the contents of a file as HTML.
*
* @param in
* file to load HTML from
* @param charsetName
* (optional) character set of file contents. Set to {@code null}
* to determine from {@code http-equiv} meta tag, if present, or
* fall back to {@code UTF-8} (which is often safe to do).
* @param baseUri
* The URL where the HTML was retrieved from, to resolve relative
* links against.
* @return sane HTML
* @throws IOException
* if the file could not be found, or read, or if the
* charsetName is invalid.
*/
public static Document parse(File in, String charsetName, String baseUri) throws IOException {
public static Document parse(File in, String charsetName, String baseUri)
throws IOException {
return DataUtil.load(in, charsetName, baseUri);
}

/**
Parse the contents of a file as HTML. The location of the file is used as the base URI to qualify relative URLs.

@param in file to load HTML from
@param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
present, or fall back to {@code UTF-8} (which is often safe to do).
@return sane HTML

@throws IOException if the file could not be found, or read, or if the charsetName is invalid.
@see #parse(File, String, String)
* Parse the contents of a file as HTML. The location of the file is used as
* the base URI to qualify relative URLs.
*
* @param in
* file to load HTML from
* @param charsetName
* (optional) character set of file contents. Set to {@code null}
* to determine from {@code http-equiv} meta tag, if present, or
* fall back to {@code UTF-8} (which is often safe to do).
* @return sane HTML
* @throws IOException
* if the file could not be found, or read, or if the
* charsetName is invalid.
* @see #parse(File, String, String)
*/
public static Document parse(File in, String charsetName) throws IOException {
public static Document parse(File in, String charsetName)
throws IOException {
return DataUtil.load(in, charsetName, in.getAbsolutePath());
}

/**
Read an input stream, and parse it to a Document.

@param in input stream to read. Make sure to close it after parsing.
@param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
present, or fall back to {@code UTF-8} (which is often safe to do).
@param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
@return sane HTML

@throws IOException if the file could not be found, or read, or if the charsetName is invalid.
/**
* Read an input stream, and parse it to a Document.
*
* @param in
* input stream to read. Make sure to close it after parsing.
* @param charsetName
* (optional) character set of file contents. Set to {@code null}
* to determine from {@code http-equiv} meta tag, if present, or
* fall back to {@code UTF-8} (which is often safe to do).
* @param baseUri
* The URL where the HTML was retrieved from, to resolve relative
* links against.
* @return sane HTML
* @throws IOException
* if the file could not be found, or read, or if the
* charsetName is invalid.
*/
public static Document parse(InputStream in, String charsetName, String baseUri) throws IOException {
public static Document parse(InputStream in, String charsetName,
String baseUri) throws IOException {
return DataUtil.load(in, charsetName, baseUri);
}

/**
Read an input stream, and parse it to a Document. You can provide an alternate parser, such as a simple XML
(non-HTML) parser.

@param in input stream to read. Make sure to close it after parsing.
@param charsetName (optional) character set of file contents. Set to {@code null} to determine from {@code http-equiv} meta tag, if
present, or fall back to {@code UTF-8} (which is often safe to do).
@param baseUri The URL where the HTML was retrieved from, to resolve relative links against.
@param parser alternate {@link Parser#xmlParser() parser} to use.
@return sane HTML

@throws IOException if the file could not be found, or read, or if the charsetName is invalid.
* Read an input stream, and parse it to a Document. You can provide an
* alternate parser, such as a simple XML (non-HTML) parser.
*
* @param in
* input stream to read. Make sure to close it after parsing.
* @param charsetName
* (optional) character set of file contents. Set to {@code null}
* to determine from {@code http-equiv} meta tag, if present, or
* fall back to {@code UTF-8} (which is often safe to do).
* @param baseUri
* The URL where the HTML was retrieved from, to resolve relative
* links against.
* @param parser
* alternate {@link Parser#xmlParser() parser} to use.
* @return sane HTML
* @throws IOException
* if the file could not be found, or read, or if the
* charsetName is invalid.
*/
public static Document parse(InputStream in, String charsetName, String baseUri, Parser parser) throws IOException {
public static Document parse(InputStream in, String charsetName,
String baseUri, Parser parser) throws IOException {
return DataUtil.load(in, charsetName, baseUri, parser);
}

/**
Parse a fragment of HTML, with the assumption that it forms the {@code body} of the HTML.

@param bodyHtml body HTML fragment
@param baseUri URL to resolve relative URLs against.
@return sane HTML document

@see Document#body()
* Parse a fragment of HTML, with the assumption that it forms the
* {@code body} of the HTML.
*
* @param bodyHtml
* body HTML fragment
* @param baseUri
* URL to resolve relative URLs against.
* @return sane HTML document
* @see Document#body()
*/
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
return Parser.parseBodyFragment(bodyHtml, baseUri);
}

/**
Parse a fragment of HTML, with the assumption that it forms the {@code body} of the HTML.

@param bodyHtml body HTML fragment
@return sane HTML document

@see Document#body()
* Parse a fragment of HTML, with the assumption that it forms the
* {@code body} of the HTML.
*
* @param bodyHtml
* body HTML fragment
* @return sane HTML document
* @see Document#body()
*/
public static Document parseBodyFragment(String bodyHtml) {
return Parser.parseBodyFragment(bodyHtml, "");
}

/**
Fetch a URL, and parse it as HTML. Provided for compatibility; in most cases use {@link #connect(String)} instead.
<p>
The encoding character set is determined by the content-type header or http-equiv meta tag, or falls back to {@code UTF-8}.

@param url URL to fetch (with a GET). The protocol must be {@code http} or {@code https}.
@param timeoutMillis Connection and read timeout, in milliseconds. If exceeded, IOException is thrown.
@return The parsed HTML.

@throws IOException If the final server response != 200 OK (redirects are followed), or if there's an error reading
the response stream.

@see #connect(String)
* Fetch a URL, and parse it as HTML. Provided for compatibility; in most
* cases use {@link #connect(String)} instead.
* <p>
* The encoding character set is determined by the content-type header or
* http-equiv meta tag, or falls back to {@code UTF-8}.
*
* @param url
* URL to fetch (with a GET). The protocol must be {@code http}
* or {@code https}.
* @param timeoutMillis
* Connection and read timeout, in milliseconds. If exceeded,
* IOException is thrown.
* @return The parsed HTML.
* @throws IOException
* If the final server response != 200 OK (redirects are
* followed), or if there's an error reading the response
* stream.
* @see #connect(String)
*/
public static Document parse(URL url, int timeoutMillis) throws IOException {
Connection con = HttpConnection.connect(url);
@@ -181,17 +236,20 @@ public class Jsoup {
}

/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.

@param bodyHtml input untrusted HTML
@param baseUri URL to resolve relative URLs against
@param whitelist white-list of permitted HTML elements
@return safe HTML

@see Cleaner#clean(Document)
* Get safe HTML from untrusted input HTML, by parsing input HTML and
* filtering it through a white-list of permitted tags and attributes.
*
* @param bodyHtml
* input untrusted HTML
* @param baseUri
* URL to resolve relative URLs against
* @param whitelist
* white-list of permitted HTML elements
* @return safe HTML
* @see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, String baseUri, Whitelist whitelist) {
public static String clean(String bodyHtml, String baseUri,
Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, baseUri);
Cleaner cleaner = new Cleaner(whitelist);
Document clean = cleaner.clean(dirty);
@@ -199,31 +257,37 @@ public class Jsoup {
}

/**
Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of permitted
tags and attributes.

@param bodyHtml input untrusted HTML
@param whitelist white-list of permitted HTML elements
@return safe HTML

@see Cleaner#clean(Document)
* Get safe HTML from untrusted input HTML, by parsing input HTML and
* filtering it through a white-list of permitted tags and attributes.
*
* @param bodyHtml
* input untrusted HTML
* @param whitelist
* white-list of permitted HTML elements
* @return safe HTML
* @see Cleaner#clean(Document)
*/
public static String clean(String bodyHtml, Whitelist whitelist) {
return clean(bodyHtml, "", whitelist);
}

/**
Test if the input HTML has only tags and attributes allowed by the Whitelist. Useful for form validation. The input HTML should
still be run through the cleaner to set up enforced attributes, and to tidy the output.
@param bodyHtml HTML to test
@param whitelist whitelist to test against
@return true if no tags or attributes were removed; false otherwise
@see #clean(String, org.jsoup.safety.Whitelist)
* Test if the input HTML has only tags and attributes allowed by the
* Whitelist. Useful for form validation. The input HTML should still be run
* through the cleaner to set up enforced attributes, and to tidy the
* output.
*
* @param bodyHtml
* HTML to test
* @param whitelist
* whitelist to test against
* @return true if no tags or attributes were removed; false otherwise
* @see #clean(String, org.jsoup.safety.Whitelist)
*/
public static boolean isValid(String bodyHtml, Whitelist whitelist) {
Document dirty = parseBodyFragment(bodyHtml, "");
Cleaner cleaner = new Cleaner(whitelist);
return cleaner.isValid(dirty);
}
}

+ 41
- 22
server/src/org/jsoup/examples/HtmlToPlainText.java View File

@@ -1,5 +1,7 @@
package org.jsoup.examples;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
@@ -10,15 +12,15 @@ import org.jsoup.nodes.TextNode;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;

import java.io.IOException;

/**
* HTML to plain-text. This example program demonstrates the use of jsoup to convert HTML input to lightly-formatted
* plain-text. That is divergent from the general goal of jsoup's .text() methods, which is to get clean data from a
* scrape.
* HTML to plain-text. This example program demonstrates the use of jsoup to
* convert HTML input to lightly-formatted plain-text. That is divergent from
* the general goal of jsoup's .text() methods, which is to get clean data from
* a scrape.
* <p/>
* Note that this is a fairly simplistic formatter -- for real world use you'll want to embrace and extend.
*
* Note that this is a fairly simplistic formatter -- for real world use you'll
* want to embrace and extend.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class HtmlToPlainText {
@@ -36,13 +38,16 @@ public class HtmlToPlainText {

/**
* Format an Element to plain-text
* @param element the root element to format
*
* @param element
* the root element to format
* @return formatted text
*/
public String getPlainText(Element element) {
FormattingVisitor formatter = new FormattingVisitor();
NodeTraversor traversor = new NodeTraversor(formatter);
traversor.traverse(element); // walk the DOM, and call .head() and .tail() for each node
traversor.traverse(element); // walk the DOM, and call .head() and
// .tail() for each node

return formatter.toString();
}
@@ -51,44 +56,57 @@ public class HtmlToPlainText {
private class FormattingVisitor implements NodeVisitor {
private static final int maxWidth = 80;
private int width = 0;
private StringBuilder accum = new StringBuilder(); // holds the accumulated text
private StringBuilder accum = new StringBuilder(); // holds the
// accumulated text

// hit when the node is first seen
@Override
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
else if (name.equals("li"))
if (node instanceof TextNode) {
append(((TextNode) node).text()); // TextNodes carry all
// user-readable text in the
// DOM.
} else if (name.equals("li")) {
append("\n * ");
}
}

// hit when all of the node's children (if any) have been visited
@Override
public void tail(Node node, int depth) {
String name = node.nodeName();
if (name.equals("br"))
if (name.equals("br")) {
append("\n");
else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5"))
} else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5")) {
append("\n\n");
else if (name.equals("a"))
} else if (name.equals("a")) {
append(String.format(" <%s>", node.absUrl("href")));
}
}

// appends text to the string builder with a simple word wrap method
private void append(String text) {
if (text.startsWith("\n"))
width = 0; // reset counter if starts with a newline. only from formats above, not in natural text
if (text.equals(" ") &&
(accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n")))
if (text.startsWith("\n")) {
width = 0; // reset counter if starts with a newline. only from
// formats above, not in natural text
}
if (text.equals(" ")
&& (accum.length() == 0 || StringUtil.in(
accum.substring(accum.length() - 1), " ", "\n"))) {
return; // don't accumulate long runs of empty spaces
}

if (text.length() + width > maxWidth) { // won't fit, needs to wrap
String words[] = text.split("\\s+");
for (int i = 0; i < words.length; i++) {
String word = words[i];
boolean last = i == words.length - 1;
if (!last) // insert a space if not the last word
if (!last) {
word = word + " ";
if (word.length() + width > maxWidth) { // wrap and reset counter
}
if (word.length() + width > maxWidth) { // wrap and reset
// counter
accum.append("\n").append(word);
width = word.length();
} else {
@@ -102,6 +120,7 @@ public class HtmlToPlainText {
}
}

@Override
public String toString() {
return accum.toString();
}

+ 16
- 12
server/src/org/jsoup/examples/ListLinks.java View File

@@ -1,13 +1,13 @@
package org.jsoup.examples;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

/**
* Example program to list links from a URL.
*/
@@ -24,22 +24,25 @@ public class ListLinks {

print("\nMedia: (%d)", media.size());
for (Element src : media) {
if (src.tagName().equals("img"))
print(" * %s: <%s> %sx%s (%s)",
src.tagName(), src.attr("abs:src"), src.attr("width"), src.attr("height"),
trim(src.attr("alt"), 20));
else
if (src.tagName().equals("img")) {
print(" * %s: <%s> %sx%s (%s)", src.tagName(),
src.attr("abs:src"), src.attr("width"),
src.attr("height"), trim(src.attr("alt"), 20));
} else {
print(" * %s: <%s>", src.tagName(), src.attr("abs:src"));
}
}

print("\nImports: (%d)", imports.size());
for (Element link : imports) {
print(" * %s <%s> (%s)", link.tagName(),link.attr("abs:href"), link.attr("rel"));
print(" * %s <%s> (%s)", link.tagName(), link.attr("abs:href"),
link.attr("rel"));
}

print("\nLinks: (%d)", links.size());
for (Element link : links) {
print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35));
print(" * a: <%s> (%s)", link.attr("abs:href"),
trim(link.text(), 35));
}
}

@@ -48,9 +51,10 @@ public class ListLinks {
}

private static String trim(String s, int width) {
if (s.length() > width)
return s.substring(0, width-1) + ".";
else
if (s.length() > width) {
return s.substring(0, width - 1) + ".";
} else {
return s;
}
}
}

+ 100
- 49
server/src/org/jsoup/helper/DataUtil.java View File

@@ -1,102 +1,147 @@
package org.jsoup.helper;

import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import java.io.*;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;

/**
* Internal static utilities for handling data.
*
*
*/
public class DataUtil {
private static final Pattern charsetPattern = Pattern.compile("(?i)\\bcharset=\\s*\"?([^\\s;\"]*)");
static final String defaultCharset = "UTF-8"; // used if not found in header or meta charset
private static final Pattern charsetPattern = Pattern
.compile("(?i)\\bcharset=\\s*\"?([^\\s;\"]*)");
static final String defaultCharset = "UTF-8"; // used if not found in header
// or meta charset
private static final int bufferSize = 0x20000; // ~130K.

private DataUtil() {}
private DataUtil() {
}

/**
* Loads a file to a Document.
* @param in file to load
* @param charsetName character set of input
* @param baseUri base URI of document, to resolve relative links against
*
* @param in
* file to load
* @param charsetName
* character set of input
* @param baseUri
* base URI of document, to resolve relative links against
* @return Document
* @throws IOException on IO error
* @throws IOException
* on IO error
*/
public static Document load(File in, String charsetName, String baseUri) throws IOException {
public static Document load(File in, String charsetName, String baseUri)
throws IOException {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(in);
ByteBuffer byteData = readToByteBuffer(inStream);
return parseByteData(byteData, charsetName, baseUri, Parser.htmlParser());
return parseByteData(byteData, charsetName, baseUri,
Parser.htmlParser());
} finally {
if (inStream != null)
if (inStream != null) {
inStream.close();
}
}
}

/**
* Parses a Document from an input steam.
* @param in input stream to parse. You will need to close it.
* @param charsetName character set of input
* @param baseUri base URI of document, to resolve relative links against
*
* @param in
* input stream to parse. You will need to close it.
* @param charsetName
* character set of input
* @param baseUri
* base URI of document, to resolve relative links against
* @return Document
* @throws IOException on IO error
* @throws IOException
* on IO error
*/
public static Document load(InputStream in, String charsetName, String baseUri) throws IOException {
public static Document load(InputStream in, String charsetName,
String baseUri) throws IOException {
ByteBuffer byteData = readToByteBuffer(in);
return parseByteData(byteData, charsetName, baseUri, Parser.htmlParser());
return parseByteData(byteData, charsetName, baseUri,
Parser.htmlParser());
}

/**
* Parses a Document from an input steam, using the provided Parser.
* @param in input stream to parse. You will need to close it.
* @param charsetName character set of input
* @param baseUri base URI of document, to resolve relative links against
* @param parser alternate {@link Parser#xmlParser() parser} to use.
*
* @param in
* input stream to parse. You will need to close it.
* @param charsetName
* character set of input
* @param baseUri
* base URI of document, to resolve relative links against
* @param parser
* alternate {@link Parser#xmlParser() parser} to use.
* @return Document
* @throws IOException on IO error
* @throws IOException
* on IO error
*/
public static Document load(InputStream in, String charsetName, String baseUri, Parser parser) throws IOException {
public static Document load(InputStream in, String charsetName,
String baseUri, Parser parser) throws IOException {
ByteBuffer byteData = readToByteBuffer(in);
return parseByteData(byteData, charsetName, baseUri, parser);
}

// reads bytes first into a buffer, then decodes with the appropriate charset. done this way to support
// switching the chartset midstream when a meta http-equiv tag defines the charset.
static Document parseByteData(ByteBuffer byteData, String charsetName, String baseUri, Parser parser) {
// reads bytes first into a buffer, then decodes with the appropriate
// charset. done this way to support
// switching the chartset midstream when a meta http-equiv tag defines the
// charset.
static Document parseByteData(ByteBuffer byteData, String charsetName,
String baseUri, Parser parser) {
String docData;
Document doc = null;
if (charsetName == null) { // determine from meta. safe parse as UTF-8
// look for <meta http-equiv="Content-Type" content="text/html;charset=gb2312"> or HTML5 <meta charset="gb2312">
docData = Charset.forName(defaultCharset).decode(byteData).toString();
// look for <meta http-equiv="Content-Type"
// content="text/html;charset=gb2312"> or HTML5 <meta
// charset="gb2312">
docData = Charset.forName(defaultCharset).decode(byteData)
.toString();
doc = parser.parseInput(docData, baseUri);
Element meta = doc.select("meta[http-equiv=content-type], meta[charset]").first();
Element meta = doc.select(
"meta[http-equiv=content-type], meta[charset]").first();
if (meta != null) { // if not found, will keep utf-8 as best attempt
String foundCharset = meta.hasAttr("http-equiv") ? getCharsetFromContentType(meta.attr("content")) : meta.attr("charset");
if (foundCharset != null && foundCharset.length() != 0 && !foundCharset.equals(defaultCharset)) { // need to re-decode
String foundCharset = meta.hasAttr("http-equiv") ? getCharsetFromContentType(meta
.attr("content")) : meta.attr("charset");
if (foundCharset != null && foundCharset.length() != 0
&& !foundCharset.equals(defaultCharset)) { // need to
// re-decode
charsetName = foundCharset;
byteData.rewind();
docData = Charset.forName(foundCharset).decode(byteData).toString();
docData = Charset.forName(foundCharset).decode(byteData)
.toString();
doc = null;
}
}
} else { // specified by content type header (or by user on file load)
Validate.notEmpty(charsetName, "Must set charset arg to character set of file to parse. Set to null to attempt to detect from HTML");
Validate.notEmpty(
charsetName,
"Must set charset arg to character set of file to parse. Set to null to attempt to detect from HTML");
docData = Charset.forName(charsetName).decode(byteData).toString();
}
if (doc == null) {
// there are times where there is a spurious byte-order-mark at the start of the text. Shouldn't be present
// in utf-8. If after decoding, there is a BOM, strip it; otherwise will cause the parser to go straight
// there are times where there is a spurious byte-order-mark at the
// start of the text. Shouldn't be present
// in utf-8. If after decoding, there is a BOM, strip it; otherwise
// will cause the parser to go straight
// into head mode
if (docData.charAt(0) == 65279)
if (docData.charAt(0) == 65279) {
docData = docData.substring(1);
}

doc = parser.parseInput(docData, baseUri);
doc.outputSettings().charset(charsetName);
@@ -108,9 +153,11 @@ public class DataUtil {
byte[] buffer = new byte[bufferSize];
ByteArrayOutputStream outStream = new ByteArrayOutputStream(bufferSize);
int read;
while(true) {
read = inStream.read(buffer);
if (read == -1) break;
while (true) {
read = inStream.read(buffer);
if (read == -1) {
break;
}
outStream.write(buffer, 0, read);
}
ByteBuffer byteData = ByteBuffer.wrap(outStream.toByteArray());
@@ -119,17 +166,21 @@ public class DataUtil {

/**
* Parse out a charset from a content type header.
* @param contentType e.g. "text/html; charset=EUC-JP"
* @return "EUC-JP", or null if not found. Charset is trimmed and uppercased.
*
* @param contentType
* e.g. "text/html; charset=EUC-JP"
* @return "EUC-JP", or null if not found. Charset is trimmed and
* uppercased.
*/
static String getCharsetFromContentType(String contentType) {
if (contentType == null) return null;
if (contentType == null) {
return null;
}
Matcher m = charsetPattern.matcher(contentType);
if (m.find()) {
return m.group(1).trim().toUpperCase();
}
return null;
}

}

+ 21
- 4
server/src/org/jsoup/helper/DescendableLinkedList.java View File

@@ -5,7 +5,8 @@ import java.util.LinkedList;
import java.util.ListIterator;

/**
* Provides a descending iterator and other 1.6 methods to allow support on the 1.5 JRE.
* Provides a descending iterator and other 1.6 methods to allow support on the
* 1.5 JRE.
*/
public class DescendableLinkedList<E> extends LinkedList<E> {

@@ -18,32 +19,43 @@ public class DescendableLinkedList<E> extends LinkedList<E> {

/**
* Add a new element to the start of the list.
* @param e element to add
*
* @param e
* element to add
*/
@Override
public void push(E e) {
addFirst(e);
}

/**
* Look at the last element, if there is one.
*
* @return the last element, or null
*/
@Override
public E peekLast() {
return size() == 0 ? null : getLast();
}

/**
* Remove and return the last element, if there is one
*
* @return the last element, or null
*/
@Override
public E pollLast() {
return size() == 0 ? null : removeLast();
}

/**
* Get an iterator that starts and the end of the list and works towards the start.
* @return an iterator that starts and the end of the list and works towards the start.
* Get an iterator that starts and the end of the list and works towards the
* start.
*
* @return an iterator that starts and the end of the list and works towards
* the start.
*/
@Override
public Iterator<E> descendingIterator() {
return new DescendingIterator<E>(size());
}
@@ -58,16 +70,20 @@ public class DescendableLinkedList<E> extends LinkedList<E> {

/**
* Check if there is another element on the list.
*
* @return if another element
*/
@Override
public boolean hasNext() {
return iter.hasPrevious();
}

/**
* Get the next element.
*
* @return the next element.
*/
@Override
public E next() {
return iter.previous();
}
@@ -75,6 +91,7 @@ public class DescendableLinkedList<E> extends LinkedList<E> {
/**
* Remove the current element.
*/
@Override
public void remove() {
iter.remove();
}

+ 258
- 102
server/src/org/jsoup/helper/HttpConnection.java View File

@@ -1,23 +1,32 @@
package org.jsoup.helper;

import org.jsoup.Connection;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.jsoup.parser.TokenQueue;

import java.io.*;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;

import org.jsoup.Connection;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.jsoup.parser.TokenQueue;

/**
* Implementation of {@link Connection}.
* @see org.jsoup.Jsoup#connect(String)
*
* @see org.jsoup.Jsoup#connect(String)
*/
public class HttpConnection implements Connection {
public static Connection connect(String url) {
@@ -35,16 +44,18 @@ public class HttpConnection implements Connection {
private Connection.Request req;
private Connection.Response res;

private HttpConnection() {
private HttpConnection() {
req = new Request();
res = new Response();
}

@Override
public Connection url(URL url) {
req.url(url);
return this;
}

@Override
public Connection url(String url) {
Validate.notEmpty(url, "Must supply a valid URL");
try {
@@ -55,48 +66,57 @@ public class HttpConnection implements Connection {
return this;
}

@Override
public Connection userAgent(String userAgent) {
Validate.notNull(userAgent, "User agent must not be null");
req.header("User-Agent", userAgent);
return this;
}

@Override
public Connection timeout(int millis) {
req.timeout(millis);
return this;
}

@Override
public Connection followRedirects(boolean followRedirects) {
req.followRedirects(followRedirects);
return this;
}

@Override
public Connection referrer(String referrer) {
Validate.notNull(referrer, "Referrer must not be null");
req.header("Referer", referrer);
return this;
}

@Override
public Connection method(Method method) {
req.method(method);
return this;
}

@Override
public Connection ignoreHttpErrors(boolean ignoreHttpErrors) {
req.ignoreHttpErrors(ignoreHttpErrors);
return this;
}
req.ignoreHttpErrors(ignoreHttpErrors);
return this;
}

@Override
public Connection ignoreContentType(boolean ignoreContentType) {
req.ignoreContentType(ignoreContentType);
return this;
}

@Override
public Connection data(String key, String value) {
req.data(KeyVal.create(key, value));
return this;
}

@Override
public Connection data(Map<String, String> data) {
Validate.notNull(data, "Data map must not be null");
for (Map.Entry<String, String> entry : data.entrySet()) {
@@ -105,12 +125,14 @@ public class HttpConnection implements Connection {
return this;
}

@Override
public Connection data(String... keyvals) {
Validate.notNull(keyvals, "Data key value pairs must not be null");
Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs");
Validate.isTrue(keyvals.length % 2 == 0,
"Must supply an even number of key value pairs");
for (int i = 0; i < keyvals.length; i += 2) {
String key = keyvals[i];
String value = keyvals[i+1];
String value = keyvals[i + 1];
Validate.notEmpty(key, "Data key must not be empty");
Validate.notNull(value, "Data value must not be null");
req.data(KeyVal.create(key, value));
@@ -118,16 +140,19 @@ public class HttpConnection implements Connection {
return this;
}

@Override
public Connection header(String name, String value) {
req.header(name, value);
return this;
}

@Override
public Connection cookie(String name, String value) {
req.cookie(name, value);
return this;
}

@Override
public Connection cookies(Map<String, String> cookies) {
Validate.notNull(cookies, "Cookie map must not be null");
for (Map.Entry<String, String> entry : cookies.entrySet()) {
@@ -136,48 +161,57 @@ public class HttpConnection implements Connection {
return this;
}

@Override
public Connection parser(Parser parser) {
req.parser(parser);
return this;
}

@Override
public Document get() throws IOException {
req.method(Method.GET);
execute();
return res.parse();
}

@Override
public Document post() throws IOException {
req.method(Method.POST);
execute();
return res.parse();
}

@Override
public Connection.Response execute() throws IOException {
res = Response.execute(req);
return res;
}

@Override
public Connection.Request request() {
return req;
}

@Override
public Connection request(Connection.Request request) {
req = request;
return this;
}

@Override
public Connection.Response response() {
return res;
}

@Override
public Connection response(Connection.Response response) {
res = response;
return this;
}

@SuppressWarnings({"unchecked"})
private static abstract class Base<T extends Connection.Base> implements Connection.Base<T> {
@SuppressWarnings({ "unchecked" })
private static abstract class Base<T extends Connection.Base> implements
Connection.Base<T> {
URL url;
Method method;
Map<String, String> headers;
@@ -188,66 +222,83 @@ public class HttpConnection implements Connection {
cookies = new LinkedHashMap<String, String>();
}

@Override
public URL url() {
return url;
}

@Override
public T url(URL url) {
Validate.notNull(url, "URL must not be null");
this.url = url;
return (T) this;
}

@Override
public Method method() {
return method;
}

@Override
public T method(Method method) {
Validate.notNull(method, "Method must not be null");
this.method = method;
return (T) this;
}

@Override
public String header(String name) {
Validate.notNull(name, "Header name must not be null");
return getHeaderCaseInsensitive(name);
}

@Override
public T header(String name, String value) {
Validate.notEmpty(name, "Header name must not be empty");
Validate.notNull(value, "Header value must not be null");
removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding"
removeHeader(name); // ensures we don't get an "accept-encoding" and
// a "Accept-Encoding"
headers.put(name, value);
return (T) this;
}

@Override
public boolean hasHeader(String name) {
Validate.notEmpty(name, "Header name must not be empty");
return getHeaderCaseInsensitive(name) != null;
}

@Override
public T removeHeader(String name) {
Validate.notEmpty(name, "Header name must not be empty");
Map.Entry<String, String> entry = scanHeaders(name); // remove is case insensitive too
if (entry != null)
Map.Entry<String, String> entry = scanHeaders(name); // remove is
// case
// insensitive
// too
if (entry != null) {
headers.remove(entry.getKey()); // ensures correct case
}
return (T) this;
}

@Override
public Map<String, String> headers() {
return headers;
}

private String getHeaderCaseInsensitive(String name) {
Validate.notNull(name, "Header name must not be null");
// quick evals for common case of title case, lower case, then scan for mixed
// quick evals for common case of title case, lower case, then scan
// for mixed
String value = headers.get(name);
if (value == null)
if (value == null) {
value = headers.get(name.toLowerCase());
}
if (value == null) {
Map.Entry<String, String> entry = scanHeaders(name);
if (entry != null)
if (entry != null) {
value = entry.getValue();
}
}
return value;
}
@@ -255,17 +306,20 @@ public class HttpConnection implements Connection {
private Map.Entry<String, String> scanHeaders(String name) {
String lc = name.toLowerCase();
for (Map.Entry<String, String> entry : headers.entrySet()) {
if (entry.getKey().toLowerCase().equals(lc))
if (entry.getKey().toLowerCase().equals(lc)) {
return entry;
}
}
return null;
}

@Override
public String cookie(String name) {
Validate.notNull(name, "Cookie name must not be null");
return cookies.get(name);
}

@Override
public T cookie(String name, String value) {
Validate.notEmpty(name, "Cookie name must not be empty");
Validate.notNull(value, "Cookie value must not be null");
@@ -273,23 +327,27 @@ public class HttpConnection implements Connection {
return (T) this;
}

@Override
public boolean hasCookie(String name) {
Validate.notEmpty("Cookie name must not be empty");
return cookies.containsKey(name);
}

@Override
public T removeCookie(String name) {
Validate.notEmpty("Cookie name must not be empty");
cookies.remove(name);
return (T) this;
}

@Override
public Map<String, String> cookies() {
return cookies;
}
}

public static class Request extends Base<Connection.Request> implements Connection.Request {
public static class Request extends Base<Connection.Request> implements
Connection.Request {
private int timeoutMilliseconds;
private boolean followRedirects;
private Collection<Connection.KeyVal> data;
@@ -297,7 +355,7 @@ public class HttpConnection implements Connection {
private boolean ignoreContentType = false;
private Parser parser;

private Request() {
private Request() {
timeoutMilliseconds = 3000;
followRedirects = true;
data = new ArrayList<Connection.KeyVal>();
@@ -306,64 +364,78 @@ public class HttpConnection implements Connection {
parser = Parser.htmlParser();
}

@Override
public int timeout() {
return timeoutMilliseconds;
}

@Override
public Request timeout(int millis) {
Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater");
Validate.isTrue(millis >= 0,
"Timeout milliseconds must be 0 (infinite) or greater");
timeoutMilliseconds = millis;
return this;
}

@Override
public boolean followRedirects() {
return followRedirects;
}

@Override
public Connection.Request followRedirects(boolean followRedirects) {
this.followRedirects = followRedirects;
return this;
}

@Override
public boolean ignoreHttpErrors() {
return ignoreHttpErrors;
}

@Override
public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) {
this.ignoreHttpErrors = ignoreHttpErrors;
return this;
}

@Override
public boolean ignoreContentType() {
return ignoreContentType;
}

@Override
public Connection.Request ignoreContentType(boolean ignoreContentType) {
this.ignoreContentType = ignoreContentType;
return this;
}

@Override
public Request data(Connection.KeyVal keyval) {
Validate.notNull(keyval, "Key val must not be null");
data.add(keyval);
return this;
}

@Override
public Collection<Connection.KeyVal> data() {
return data;
}

@Override
public Request parser(Parser parser) {
this.parser = parser;
return this;
}

@Override
public Parser parser() {
return parser;
}
}

public static class Response extends Base<Connection.Response> implements Connection.Response {
public static class Response extends Base<Connection.Response> implements
Connection.Response {
private static final int MAX_REDIRECTS = 20;
private int statusCode;
private String statusMessage;
@@ -382,44 +454,65 @@ public class HttpConnection implements Connection {
super();
if (previousResponse != null) {
numRedirects = previousResponse.numRedirects + 1;
if (numRedirects >= MAX_REDIRECTS)
throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url()));
if (numRedirects >= MAX_REDIRECTS) {
throw new IOException(
String.format(
"Too many redirects occurred trying to load URL %s",
previousResponse.url()));
}
}
}
static Response execute(Connection.Request req) throws IOException {
return execute(req, null);
}

static Response execute(Connection.Request req, Response previousResponse) throws IOException {
static Response execute(Connection.Request req,
Response previousResponse) throws IOException {
Validate.notNull(req, "Request must not be null");
String protocol = req.url().getProtocol();
Validate
.isTrue(protocol.equals("http") || protocol.equals("https"), "Only http & https protocols supported");
Validate.isTrue(
protocol.equals("http") || protocol.equals("https"),
"Only http & https protocols supported");

// set up the request for execution
if (req.method() == Connection.Method.GET && req.data().size() > 0)
if (req.method() == Connection.Method.GET && req.data().size() > 0) {
serialiseRequestUrl(req); // appends query string
}
HttpURLConnection conn = createConnection(req);
conn.connect();
if (req.method() == Connection.Method.POST)
writePost(req.data(), conn.getOutputStream());
if (req.method() == Connection.Method.POST) {
writePost(req.data(), conn.getOutputStream());
}

int status = conn.getResponseCode();
boolean needsRedirect = false;
if (status != HttpURLConnection.HTTP_OK) {
if (status == HttpURLConnection.HTTP_MOVED_TEMP || status == HttpURLConnection.HTTP_MOVED_PERM || status == HttpURLConnection.HTTP_SEE_OTHER)
if (status == HttpURLConnection.HTTP_MOVED_TEMP
|| status == HttpURLConnection.HTTP_MOVED_PERM
|| status == HttpURLConnection.HTTP_SEE_OTHER) {
needsRedirect = true;
else if (!req.ignoreHttpErrors())
throw new IOException(status + " error loading URL " + req.url().toString());
} else if (!req.ignoreHttpErrors()) {
throw new IOException(status + " error loading URL "
+ req.url().toString());
}
}
Response res = new Response(previousResponse);
res.setupFromConnection(conn, previousResponse);
if (needsRedirect && req.followRedirects()) {
req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
req.method(Method.GET); // always redirect with a get. any data
// param from original req are dropped.
req.data().clear();
req.url(new URL(req.url(), res.header("Location")));
for (Map.Entry<String, String> cookie : res.cookies.entrySet()) { // add response cookies to request (for e.g. login posts)
for (Map.Entry<String, String> cookie : res.cookies.entrySet()) { // add
// response
// cookies
// to
// request
// (for
// e.g.
// login
// posts)
req.cookie(cookie.getKey(), cookie.getValue());
}
return execute(req, res);
@@ -429,77 +522,120 @@ public class HttpConnection implements Connection {
InputStream bodyStream = null;
InputStream dataStream = null;
try {
dataStream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream();
bodyStream = res.hasHeader("Content-Encoding") && res.header("Content-Encoding").equalsIgnoreCase("gzip") ?
new BufferedInputStream(new GZIPInputStream(dataStream)) :
new BufferedInputStream(dataStream);
dataStream = conn.getErrorStream() != null ? conn
.getErrorStream() : conn.getInputStream();
bodyStream = res.hasHeader("Content-Encoding")
&& res.header("Content-Encoding").equalsIgnoreCase(
"gzip") ? new BufferedInputStream(
new GZIPInputStream(dataStream))
: new BufferedInputStream(dataStream);

res.byteData = DataUtil.readToByteBuffer(bodyStream);
res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it
res.charset = DataUtil
.getCharsetFromContentType(res.contentType); // may be
// null,
// readInputStream
// deals
// with it
} finally {
if (bodyStream != null) bodyStream.close();
if (dataStream != null) dataStream.close();
if (bodyStream != null) {
bodyStream.close();
}
if (dataStream != null) {
dataStream.close();
}
}

res.executed = true;
return res;
}

@Override
public int statusCode() {
return statusCode;
}

@Override
public String statusMessage() {
return statusMessage;
}

@Override
public String charset() {
return charset;
}

@Override
public String contentType() {
return contentType;
}

@Override
public Document parse() throws IOException {
Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
if (!req.ignoreContentType() && (contentType == null || !(contentType.startsWith("text/") || contentType.startsWith("application/xml") || contentType.startsWith("application/xhtml+xml"))))
throw new IOException(String.format("Unhandled content type \"%s\" on URL %s. Must be text/*, application/xml, or application/xhtml+xml",
contentType, url.toString()));
Document doc = DataUtil.parseByteData(byteData, charset, url.toExternalForm(), req.parser());
Validate.isTrue(
executed,
"Request must be executed (with .execute(), .get(), or .post() before parsing response");
if (!req.ignoreContentType()
&& (contentType == null || !(contentType
.startsWith("text/")
|| contentType.startsWith("application/xml") || contentType
.startsWith("application/xhtml+xml")))) {
throw new IOException(
String.format(
"Unhandled content type \"%s\" on URL %s. Must be text/*, application/xml, or application/xhtml+xml",
contentType, url.toString()));
}
Document doc = DataUtil.parseByteData(byteData, charset,
url.toExternalForm(), req.parser());
byteData.rewind();
charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
charset = doc.outputSettings().charset().name(); // update charset
// from meta-equiv,
// possibly
return doc;
}

@Override
public String body() {
Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
// charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet
Validate.isTrue(
executed,
"Request must be executed (with .execute(), .get(), or .post() before getting response body");
// charset gets set from header on execute, and from meta-equiv on
// parse. parse may not have happened yet
String body;
if (charset == null)
body = Charset.forName(DataUtil.defaultCharset).decode(byteData).toString();
else
if (charset == null) {
body = Charset.forName(DataUtil.defaultCharset)
.decode(byteData).toString();
} else {
body = Charset.forName(charset).decode(byteData).toString();
}
byteData.rewind();
return body;
}

@Override
public byte[] bodyAsBytes() {
Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
Validate.isTrue(
executed,
"Request must be executed (with .execute(), .get(), or .post() before getting response body");
return byteData.array();
}

// set up connection defaults, and details from request
private static HttpURLConnection createConnection(Connection.Request req) throws IOException {
HttpURLConnection conn = (HttpURLConnection) req.url().openConnection();
private static HttpURLConnection createConnection(Connection.Request req)
throws IOException {
HttpURLConnection conn = (HttpURLConnection) req.url()
.openConnection();
conn.setRequestMethod(req.method().name());
conn.setInstanceFollowRedirects(false); // don't rely on native redirection support
conn.setInstanceFollowRedirects(false); // don't rely on native
// redirection support
conn.setConnectTimeout(req.timeout());
conn.setReadTimeout(req.timeout());
if (req.method() == Method.POST)
if (req.method() == Method.POST) {
conn.setDoOutput(true);
if (req.cookies().size() > 0)
}
if (req.cookies().size() > 0) {
conn.addRequestProperty("Cookie", getRequestCookieString(req));
}
for (Map.Entry<String, String> header : req.headers().entrySet()) {
conn.addRequestProperty(header.getKey(), header.getValue());
}
@@ -507,7 +643,8 @@ public class HttpConnection implements Connection {
}

// set up url, method, header, cookies
private void setupFromConnection(HttpURLConnection conn, Connection.Response previousResponse) throws IOException {
private void setupFromConnection(HttpURLConnection conn,
Connection.Response previousResponse) throws IOException {
method = Connection.Method.valueOf(conn.getRequestMethod());
url = conn.getURL();
statusCode = conn.getResponseCode();
@@ -517,11 +654,14 @@ public class HttpConnection implements Connection {
Map<String, List<String>> resHeaders = conn.getHeaderFields();
processResponseHeaders(resHeaders);

// if from a redirect, map previous response cookies into this response
// if from a redirect, map previous response cookies into this
// response
if (previousResponse != null) {
for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) {
if (!hasCookie(prevCookie.getKey()))
for (Map.Entry<String, String> prevCookie : previousResponse
.cookies().entrySet()) {
if (!hasCookie(prevCookie.getKey())) {
cookie(prevCookie.getKey(), prevCookie.getValue());
}
}
}
}
@@ -529,86 +669,98 @@ public class HttpConnection implements Connection {
void processResponseHeaders(Map<String, List<String>> resHeaders) {
for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) {
String name = entry.getKey();
if (name == null)
if (name == null) {
continue; // http/1.1 line
}

List<String> values = entry.getValue();
if (name.equalsIgnoreCase("Set-Cookie")) {
for (String value : values) {
if (value == null)
if (value == null) {
continue;
}
TokenQueue cd = new TokenQueue(value);
String cookieName = cd.chompTo("=").trim();
String cookieVal = cd.consumeTo(";").trim();
if (cookieVal == null)
if (cookieVal == null) {
cookieVal = "";
}
// ignores path, date, domain, secure et al. req'd?
// name not blank, value not null
if (cookieName != null && cookieName.length() > 0)
if (cookieName != null && cookieName.length() > 0) {
cookie(cookieName, cookieVal);
}
}
} else { // only take the first instance of each header
if (!values.isEmpty())
if (!values.isEmpty()) {
header(name, values.get(0));
}
}
}
}

private static void writePost(Collection<Connection.KeyVal> data, OutputStream outputStream) throws IOException {
OutputStreamWriter w = new OutputStreamWriter(outputStream, DataUtil.defaultCharset);
private static void writePost(Collection<Connection.KeyVal> data,
OutputStream outputStream) throws IOException {
OutputStreamWriter w = new OutputStreamWriter(outputStream,
DataUtil.defaultCharset);
boolean first = true;
for (Connection.KeyVal keyVal : data) {
if (!first)
if (!first) {
w.append('&');
else
} else {
first = false;
}

w.write(URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset));
w.write('=');
w.write(URLEncoder.encode(keyVal.value(), DataUtil.defaultCharset));
w.write(URLEncoder.encode(keyVal.value(),
DataUtil.defaultCharset));
}
w.close();
}
private static String getRequestCookieString(Connection.Request req) {
StringBuilder sb = new StringBuilder();
boolean first = true;
for (Map.Entry<String, String> cookie : req.cookies().entrySet()) {
if (!first)
if (!first) {
sb.append("; ");
else
} else {
first = false;
sb.append(cookie.getKey()).append('=').append(cookie.getValue());
// todo: spec says only ascii, no escaping / encoding defined. validate on set? or escape somehow here?
}
sb.append(cookie.getKey()).append('=')
.append(cookie.getValue());
// todo: spec says only ascii, no escaping / encoding defined.
// validate on set? or escape somehow here?
}
return sb.toString();
}

// for get url reqs, serialise the data map into the url
private static void serialiseRequestUrl(Connection.Request req) throws IOException {
private static void serialiseRequestUrl(Connection.Request req)
throws IOException {
URL in = req.url();
StringBuilder url = new StringBuilder();
boolean first = true;
// reconstitute the query, ready for appends
url
.append(in.getProtocol())
.append("://")
.append(in.getAuthority()) // includes host, port
.append(in.getPath())
.append("?");
url.append(in.getProtocol()).append("://")
.append(in.getAuthority()) // includes host, port
.append(in.getPath()).append("?");
if (in.getQuery() != null) {
url.append(in.getQuery());
first = false;
}
for (Connection.KeyVal keyVal : req.data()) {
if (!first)
if (!first) {
url.append('&');
else
} else {
first = false;
url
.append(URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset))
.append('=')
.append(URLEncoder.encode(keyVal.value(), DataUtil.defaultCharset));
}
url.append(
URLEncoder.encode(keyVal.key(), DataUtil.defaultCharset))
.append('=')
.append(URLEncoder.encode(keyVal.value(),
DataUtil.defaultCharset));
}
req.url(new URL(url.toString()));
req.data().clear(); // moved into url as get params
@@ -630,22 +782,26 @@ public class HttpConnection implements Connection {
this.value = value;
}

@Override
public KeyVal key(String key) {
Validate.notEmpty(key, "Data key must not be empty");
this.key = key;
return this;
}

@Override
public String key() {
return key;
}

@Override
public KeyVal value(String value) {
Validate.notNull(value, "Data value must not be null");
this.value = value;
return this;
}

@Override
public String value() {
return value;
}
@@ -653,6 +809,6 @@ public class HttpConnection implements Connection {
@Override
public String toString() {
return key + "=" + value;
}
}
}
}

+ 53
- 26
server/src/org/jsoup/helper/StringUtil.java View File

@@ -8,12 +8,16 @@ import java.util.Iterator;
*/
public final class StringUtil {
// memoised padding up to 10
private static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "};
private static final String[] padding = { "", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " " };

/**
* Join a collection of strings by a seperator
* @param strings collection of string objects
* @param sep string to place between strings
*
* @param strings
* collection of string objects
* @param sep
* string to place between strings
* @return joined string
*/
public static String join(Collection strings, String sep) {
@@ -22,17 +26,22 @@ public final class StringUtil {

/**
* Join a collection of strings by a seperator
* @param strings iterator of string objects
* @param sep string to place between strings
*
* @param strings
* iterator of string objects
* @param sep
* string to place between strings
* @return joined string
*/
public static String join(Iterator strings, String sep) {
if (!strings.hasNext())
if (!strings.hasNext()) {
return "";
}

String start = strings.next().toString();
if (!strings.hasNext()) // only one, avoid builder
if (!strings.hasNext()) {
return start;
}

StringBuilder sb = new StringBuilder(64).append(start);
while (strings.hasNext()) {
@@ -44,62 +53,79 @@ public final class StringUtil {

/**
* Returns space padding
* @param width amount of padding desired
*
* @param width
* amount of padding desired
* @return string of spaces * width
*/
public static String padding(int width) {
if (width < 0)
if (width < 0) {
throw new IllegalArgumentException("width must be > 0");
}

if (width < padding.length)
if (width < padding.length) {
return padding[width];
}

char[] out = new char[width];
for (int i = 0; i < width; i++)
for (int i = 0; i < width; i++) {
out[i] = ' ';
}
return String.valueOf(out);
}

/**
* Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n, \t, etc)
* @param string string to test
* Tests if a string is blank: null, emtpy, or only whitespace (" ", \r\n,
* \t, etc)
*
* @param string
* string to test
* @return if string is blank
*/
public static boolean isBlank(String string) {
if (string == null || string.length() == 0)
if (string == null || string.length() == 0) {
return true;
}

int l = string.length();
for (int i = 0; i < l; i++) {
if (!StringUtil.isWhitespace(string.codePointAt(i)))
if (!StringUtil.isWhitespace(string.codePointAt(i))) {
return false;
}
}
return true;
}

/**
* Tests if a string is numeric, i.e. contains only digit characters
* @param string string to test
* @return true if only digit chars, false if empty or null or contains non-digit chrs
*
* @param string
* string to test
* @return true if only digit chars, false if empty or null or contains
* non-digit chrs
*/
public static boolean isNumeric(String string) {
if (string == null || string.length() == 0)
if (string == null || string.length() == 0) {
return false;
}

int l = string.length();
for (int i = 0; i < l; i++) {
if (!Character.isDigit(string.codePointAt(i)))
if (!Character.isDigit(string.codePointAt(i))) {
return false;
}
}
return true;
}

/**
* Tests if a code point is "whitespace" as defined in the HTML spec.
* @param c code point to test
*
* @param c
* code point to test
* @return true if code point is whitespace, false otherwise
*/
public static boolean isWhitespace(int c){
public static boolean isWhitespace(int c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
}

@@ -117,12 +143,12 @@ public final class StringUtil {
modified = true;
continue;
}
if (c != ' ')
if (c != ' ') {
modified = true;
}
sb.append(' ');
lastWasWhite = true;
}
else {
} else {
sb.appendCodePoint(c);
lastWasWhite = false;
}
@@ -132,8 +158,9 @@ public final class StringUtil {

public static boolean in(String needle, String... haystack) {
for (String hay : haystack) {
if (hay.equals(needle))
return true;
if (hay.equals(needle)) {
return true;
}
}
return false;
}

+ 67
- 29
server/src/org/jsoup/helper/Validate.java View File

@@ -4,69 +4,93 @@ package org.jsoup.helper;
* Simple validation methods. Designed for jsoup internal use
*/
public final class Validate {
private Validate() {}

private Validate() {
}

/**
* Validates that the object is not null
* @param obj object to test
*
* @param obj
* object to test
*/
public static void notNull(Object obj) {
if (obj == null)
if (obj == null) {
throw new IllegalArgumentException("Object must not be null");
}
}

/**
* Validates that the object is not null
* @param obj object to test
* @param msg message to output if validation fails
*
* @param obj
* object to test
* @param msg
* message to output if validation fails
*/
public static void notNull(Object obj, String msg) {
if (obj == null)
if (obj == null) {
throw new IllegalArgumentException(msg);
}
}

/**
* Validates that the value is true
* @param val object to test
*
* @param val
* object to test
*/
public static void isTrue(boolean val) {
if (!val)
if (!val) {
throw new IllegalArgumentException("Must be true");
}
}

/**
* Validates that the value is true
* @param val object to test
* @param msg message to output if validation fails
*
* @param val
* object to test
* @param msg
* message to output if validation fails
*/
public static void isTrue(boolean val, String msg) {
if (!val)
if (!val) {
throw new IllegalArgumentException(msg);
}
}

/**
* Validates that the value is false
* @param val object to test
*
* @param val
* object to test
*/
public static void isFalse(boolean val) {
if (val)
if (val) {
throw new IllegalArgumentException("Must be false");
}
}

/**
* Validates that the value is false
* @param val object to test
* @param msg message to output if validation fails
*
* @param val
* object to test
* @param msg
* message to output if validation fails
*/
public static void isFalse(boolean val, String msg) {
if (val)
if (val) {
throw new IllegalArgumentException(msg);
}
}

/**
* Validates that the array contains no null elements
* @param objects the array to test
*
* @param objects
* the array to test
*/
public static void noNullElements(Object[] objects) {
noNullElements(objects, "Array must not contain any null objects");
@@ -74,37 +98,51 @@ public final class Validate {

/**
* Validates that the array contains no null elements
* @param objects the array to test
* @param msg message to output if validation fails
*
* @param objects
* the array to test
* @param msg
* message to output if validation fails
*/
public static void noNullElements(Object[] objects, String msg) {
for (Object obj : objects)
if (obj == null)
for (Object obj : objects) {
if (obj == null) {
throw new IllegalArgumentException(msg);
}
}
}

/**
* Validates that the string is not empty
* @param string the string to test
*
* @param string
* the string to test
*/
public static void notEmpty(String string) {
if (string == null || string.length() == 0)
if (string == null || string.length() == 0) {
throw new IllegalArgumentException("String must not be empty");
}
}

/**
* Validates that the string is not empty
* @param string the string to test
* @param msg message to output if validation fails
*
* @param string
* the string to test
* @param msg
* message to output if validation fails
*/
public static void notEmpty(String string, String msg) {
if (string == null || string.length() == 0)
if (string == null || string.length() == 0) {
throw new IllegalArgumentException(msg);
}
}

/**
Cause a failure.
@param msg message to output.
* Cause a failure.
*
* @param msg
* message to output.
*/
public static void fail(String msg) {
throw new IllegalArgumentException(msg);

+ 73
- 37
server/src/org/jsoup/nodes/Attribute.java View File

@@ -1,21 +1,26 @@
package org.jsoup.nodes;

import org.jsoup.helper.Validate;

import java.util.Map;

/**
A single key + value attribute. Keys are trimmed and normalised to lower-case.
import org.jsoup.helper.Validate;

@author Jonathan Hedley, jonathan@hedley.net */
public class Attribute implements Map.Entry<String, String>, Cloneable {
/**
* A single key + value attribute. Keys are trimmed and normalised to
* lower-case.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Attribute implements Map.Entry<String, String>, Cloneable {
private String key;
private String value;

/**
* Create a new attribute from unencoded (raw) key and value.
* @param key attribute key
* @param value attribute value
*
* @param key
* attribute key
* @param value
* attribute value
* @see #createFromEncoded
*/
public Attribute(String key, String value) {
@@ -26,16 +31,20 @@ public class Attribute implements Map.Entry<String, String>, Cloneable {
}

/**
Get the attribute key.
@return the attribute key
* Get the attribute key.
*
* @return the attribute key
*/
@Override
public String getKey() {
return key;
}

/**
Set the attribute key. Gets normalised as per the constructor method.
@param key the new key; must not be null
* Set the attribute key. Gets normalised as per the constructor method.
*
* @param key
* the new key; must not be null
*/
public void setKey(String key) {
Validate.notEmpty(key);
@@ -43,17 +52,22 @@ public class Attribute implements Map.Entry<String, String>, Cloneable {
}

/**
Get the attribute value.
@return the attribute value
* Get the attribute value.
*
* @return the attribute value
*/
@Override
public String getValue() {
return value;
}

/**
Set the attribute value.
@param value the new attribute value; must not be null
* Set the attribute value.
*
* @param value
* the new attribute value; must not be null
*/
@Override
public String setValue(String value) {
Validate.notNull(value);
String old = this.value;
@@ -62,53 +76,73 @@ public class Attribute implements Map.Entry<String, String>, Cloneable {
}

/**
Get the HTML representation of this attribute; e.g. {@code href="index.html"}.
@return HTML
* Get the HTML representation of this attribute; e.g.
* {@code href="index.html"}.
*
* @return HTML
*/
public String html() {
return key + "=\"" + Entities.escape(value, (new Document("")).outputSettings()) + "\"";
return key + "=\""
+ Entities.escape(value, (new Document("")).outputSettings())
+ "\"";
}
protected void html(StringBuilder accum, Document.OutputSettings out) {
accum
.append(key)
.append("=\"")
.append(Entities.escape(value, out))
.append("\"");
accum.append(key).append("=\"").append(Entities.escape(value, out))
.append("\"");
}

/**
Get the string representation of this attribute, implemented as {@link #html()}.
@return string
* Get the string representation of this attribute, implemented as
* {@link #html()}.
*
* @return string
*/
@Override
public String toString() {
return html();
}

/**
* Create a new Attribute from an unencoded key and a HTML attribute encoded value.
* @param unencodedKey assumes the key is not encoded, as can be only run of simple \w chars.
* @param encodedValue HTML attribute encoded value
* Create a new Attribute from an unencoded key and a HTML attribute encoded
* value.
*
* @param unencodedKey
* assumes the key is not encoded, as can be only run of simple
* \w chars.
* @param encodedValue
* HTML attribute encoded value
* @return attribute
*/
public static Attribute createFromEncoded(String unencodedKey, String encodedValue) {
public static Attribute createFromEncoded(String unencodedKey,
String encodedValue) {
String value = Entities.unescape(encodedValue, true);
return new Attribute(unencodedKey, value);
}

protected boolean isDataAttribute() {
return key.startsWith(Attributes.dataPrefix) && key.length() > Attributes.dataPrefix.length();
return key.startsWith(Attributes.dataPrefix)
&& key.length() > Attributes.dataPrefix.length();
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Attribute)) return false;
if (this == o) {
return true;
}
if (!(o instanceof Attribute)) {
return false;
}

Attribute attribute = (Attribute) o;

if (key != null ? !key.equals(attribute.key) : attribute.key != null) return false;
if (value != null ? !value.equals(attribute.value) : attribute.value != null) return false;
if (key != null ? !key.equals(attribute.key) : attribute.key != null) {
return false;
}
if (value != null ? !value.equals(attribute.value)
: attribute.value != null) {
return false;
}

return true;
}
@@ -123,7 +157,9 @@ public class Attribute implements Map.Entry<String, String>, Cloneable {
@Override
public Attribute clone() {
try {
return (Attribute) super.clone(); // only fields are immutable strings key and value, so no more deep copy required
return (Attribute) super.clone(); // only fields are immutable
// strings key and value, so no
// more deep copy required
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}

+ 130
- 64
server/src/org/jsoup/nodes/Attributes.java View File

@@ -1,46 +1,63 @@
package org.jsoup.nodes;

import org.jsoup.helper.Validate;
import java.util.AbstractMap;
import java.util.AbstractSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import java.util.*;
import org.jsoup.helper.Validate;

/**
* The attributes of an Element.
* <p/>
* Attributes are treated as a map: there can be only one value associated with an attribute key.
* Attributes are treated as a map: there can be only one value associated with
* an attribute key.
* <p/>
* Attribute key and value comparisons are done case insensitively, and keys are normalised to
* lower-case.
* Attribute key and value comparisons are done case insensitively, and keys are
* normalised to lower-case.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Attributes implements Iterable<Attribute>, Cloneable {
protected static final String dataPrefix = "data-";
private LinkedHashMap<String, Attribute> attributes = null;

// linked hash map to preserve insertion order.
// null be default as so many elements have no attributes -- saves a good chunk of memory
// null be default as so many elements have no attributes -- saves a good
// chunk of memory

/**
Get an attribute value by key.
@param key the attribute key
@return the attribute value if set; or empty string if not set.
@see #hasKey(String)
* Get an attribute value by key.
*
* @param key
* the attribute key
* @return the attribute value if set; or empty string if not set.
* @see #hasKey(String)
*/
public String get(String key) {
Validate.notEmpty(key);

if (attributes == null)
if (attributes == null) {
return "";
}

Attribute attr = attributes.get(key.toLowerCase());
return attr != null ? attr.getValue() : "";
}

/**
Set a new attribute, or replace an existing one by key.
@param key attribute key
@param value attribute value
* Set a new attribute, or replace an existing one by key.
*
* @param key
* attribute key
* @param value
* attribute value
*/
public void put(String key, String value) {
Attribute attr = new Attribute(key, value);
@@ -48,70 +65,88 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
}

/**
Set a new attribute, or replace an existing one by key.
@param attribute attribute
* Set a new attribute, or replace an existing one by key.
*
* @param attribute
* attribute
*/
public void put(Attribute attribute) {
Validate.notNull(attribute);
if (attributes == null)
attributes = new LinkedHashMap<String, Attribute>(2);
if (attributes == null) {
attributes = new LinkedHashMap<String, Attribute>(2);
}
attributes.put(attribute.getKey(), attribute);
}

/**
Remove an attribute by key.
@param key attribute key to remove
* Remove an attribute by key.
*
* @param key
* attribute key to remove
*/
public void remove(String key) {
Validate.notEmpty(key);
if (attributes == null)
if (attributes == null) {
return;
}
attributes.remove(key.toLowerCase());
}

/**
Tests if these attributes contain an attribute with this key.
@param key key to check for
@return true if key exists, false otherwise
* Tests if these attributes contain an attribute with this key.
*
* @param key
* key to check for
* @return true if key exists, false otherwise
*/
public boolean hasKey(String key) {
return attributes != null && attributes.containsKey(key.toLowerCase());
}

/**
Get the number of attributes in this set.
@return size
* Get the number of attributes in this set.
*
* @return size
*/
public int size() {
if (attributes == null)
if (attributes == null) {
return 0;
}
return attributes.size();
}

/**
Add all the attributes from the incoming set to this set.
@param incoming attributes to add to these attributes.
* Add all the attributes from the incoming set to this set.
*
* @param incoming
* attributes to add to these attributes.
*/
public void addAll(Attributes incoming) {
if (incoming.size() == 0)
if (incoming.size() == 0) {
return;
if (attributes == null)
}
if (attributes == null) {
attributes = new LinkedHashMap<String, Attribute>(incoming.size());
}
attributes.putAll(incoming.attributes);
}

@Override
public Iterator<Attribute> iterator() {
return asList().iterator();
}

/**
Get the attributes as a List, for iteration. Do not modify the keys of the attributes via this view, as changes
to keys will not be recognised in the containing set.
@return an view of the attributes as a List.
* Get the attributes as a List, for iteration. Do not modify the keys of
* the attributes via this view, as changes to keys will not be recognised
* in the containing set.
*
* @return an view of the attributes as a List.
*/
public List<Attribute> asList() {
if (attributes == null)
if (attributes == null) {
return Collections.emptyList();
}

List<Attribute> list = new ArrayList<Attribute>(attributes.size());
for (Map.Entry<String, Attribute> entry : attributes.entrySet()) {
@@ -121,8 +156,9 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
}

/**
* Retrieves a filtered view of attributes that are HTML5 custom data attributes; that is, attributes with keys
* starting with {@code data-}.
* Retrieves a filtered view of attributes that are HTML5 custom data
* attributes; that is, attributes with keys starting with {@code data-}.
*
* @return map of custom data attributes.
*/
public Map<String, String> dataset() {
@@ -130,42 +166,54 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
}

/**
Get the HTML representation of these attributes.
@return HTML
* Get the HTML representation of these attributes.
*
* @return HTML
*/
public String html() {
StringBuilder accum = new StringBuilder();
html(accum, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used
html(accum, (new Document("")).outputSettings()); // output settings a
// bit funky, but this
// html() seldom used
return accum.toString();
}
void html(StringBuilder accum, Document.OutputSettings out) {
if (attributes == null)
if (attributes == null) {
return;
}

for (Map.Entry<String, Attribute> entry : attributes.entrySet()) {
Attribute attribute = entry.getValue();
accum.append(" ");
attribute.html(accum, out);
}
}

@Override
public String toString() {
return html();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Attributes)) return false;
if (this == o) {
return true;
}
if (!(o instanceof Attributes)) {
return false;
}

Attributes that = (Attributes) o;
if (attributes != null ? !attributes.equals(that.attributes) : that.attributes != null) return false;

if (attributes != null ? !attributes.equals(that.attributes)
: that.attributes != null) {
return false;
}

return true;
}
@Override
public int hashCode() {
return attributes != null ? attributes.hashCode() : 0;
@@ -173,8 +221,9 @@ public class Attributes implements Iterable<Attribute>, Cloneable {

@Override
public Attributes clone() {
if (attributes == null)
if (attributes == null) {
return new Attributes();
}

Attributes clone;
try {
@@ -182,19 +231,23 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
clone.attributes = new LinkedHashMap<String, Attribute>(attributes.size());
for (Attribute attribute: this)
clone.attributes = new LinkedHashMap<String, Attribute>(
attributes.size());
for (Attribute attribute : this) {
clone.attributes.put(attribute.getKey(), attribute.clone());
}
return clone;
}

private class Dataset extends AbstractMap<String, String> {

private Dataset() {
if (attributes == null)
if (attributes == null) {
attributes = new LinkedHashMap<String, Attribute>(2);
}
}

@Override
public Set<Entry<String, String>> entrySet() {
return new EntrySet();
}
@@ -202,41 +255,54 @@ public class Attributes implements Iterable<Attribute>, Cloneable {
@Override
public String put(String key, String value) {
String dataKey = dataKey(key);
String oldValue = hasKey(dataKey) ? attributes.get(dataKey).getValue() : null;
String oldValue = hasKey(dataKey) ? attributes.get(dataKey)
.getValue() : null;
Attribute attr = new Attribute(dataKey, value);
attributes.put(dataKey, attr);
return oldValue;
}

private class EntrySet extends AbstractSet<Map.Entry<String, String>> {
@Override
public Iterator<Map.Entry<String, String>> iterator() {
return new DatasetIterator();
}

@Override
public int size() {
int count = 0;
Iterator iter = new DatasetIterator();
while (iter.hasNext())
while (iter.hasNext()) {
count++;
}
return count;
}
}

private class DatasetIterator implements Iterator<Map.Entry<String, String>> {
private Iterator<Attribute> attrIter = attributes.values().iterator();
private class DatasetIterator implements
Iterator<Map.Entry<String, String>> {
private Iterator<Attribute> attrIter = attributes.values()
.iterator();
private Attribute attr;

@Override
public boolean hasNext() {
while (attrIter.hasNext()) {
attr = attrIter.next();
if (attr.isDataAttribute()) return true;
if (attr.isDataAttribute()) {
return true;
}
}
return false;
}

@Override
public Entry<String, String> next() {
return new Attribute(attr.getKey().substring(dataPrefix.length()), attr.getValue());
return new Attribute(attr.getKey().substring(
dataPrefix.length()), attr.getValue());
}

@Override
public void remove() {
attributes.remove(attr.getKey());
}

+ 25
- 15
server/src/org/jsoup/nodes/Comment.java View File

@@ -1,45 +1,55 @@
package org.jsoup.nodes;

/**
A comment node.

@author Jonathan Hedley, jonathan@hedley.net */
* A comment node.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Comment extends Node {
private static final String COMMENT_KEY = "comment";

/**
Create a new comment node.
@param data The contents of the comment
@param baseUri base URI
* Create a new comment node.
*
* @param data
* The contents of the comment
* @param baseUri
* base URI
*/
public Comment(String data, String baseUri) {
super(baseUri);
attributes.put(COMMENT_KEY, data);
}

@Override
public String nodeName() {
return "#comment";
}

/**
Get the contents of the comment.
@return comment content
* Get the contents of the comment.
*
* @return comment content
*/
public String getData() {
return attributes.get(COMMENT_KEY);
}

void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
if (out.prettyPrint())
@Override
void outerHtmlHead(StringBuilder accum, int depth,
Document.OutputSettings out) {
if (out.prettyPrint()) {
indent(accum, depth, out);
accum
.append("<!--")
.append(getData())
.append("-->");
}
accum.append("<!--").append(getData()).append("-->");
}

void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
@Override
void outerHtmlTail(StringBuilder accum, int depth,
Document.OutputSettings out) {
}

@Override
public String toString() {
return outerHtml();
}

+ 37
- 17
server/src/org/jsoup/nodes/DataNode.java View File

@@ -1,29 +1,37 @@
package org.jsoup.nodes;

/**
A data node, for contents of style, script tags etc, where contents should not show in text().

@author Jonathan Hedley, jonathan@hedley.net */
public class DataNode extends Node{
* A data node, for contents of style, script tags etc, where contents should
* not show in text().
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class DataNode extends Node {
private static final String DATA_KEY = "data";

/**
Create a new DataNode.
@param data data contents
@param baseUri base URI
* Create a new DataNode.
*
* @param data
* data contents
* @param baseUri
* base URI
*/
public DataNode(String data, String baseUri) {
super(baseUri);
attributes.put(DATA_KEY, data);
}

@Override
public String nodeName() {
return "#data";
}

/**
Get the data contents of this node. Will be unescaped and with original new lines, space etc.
@return data
* Get the data contents of this node. Will be unescaped and with original
* new lines, space etc.
*
* @return data
*/
public String getWholeData() {
return attributes.get(DATA_KEY);
@@ -31,7 +39,9 @@ public class DataNode extends Node{

/**
* Set the data contents of this node.
* @param data unencoded data
*
* @param data
* unencoded data
* @return this node, for chaining
*/
public DataNode setWholeData(String data) {
@@ -39,21 +49,31 @@ public class DataNode extends Node{
return this;
}

void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
accum.append(getWholeData()); // data is not escaped in return from data nodes, so " in script, style is plain
@Override
void outerHtmlHead(StringBuilder accum, int depth,
Document.OutputSettings out) {
accum.append(getWholeData()); // data is not escaped in return from data
// nodes, so " in script, style is plain
}

void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
@Override
void outerHtmlTail(StringBuilder accum, int depth,
Document.OutputSettings out) {
}

@Override
public String toString() {
return outerHtml();
}

/**
Create a new DataNode from HTML encoded data.
@param encodedData encoded data
@param baseUri bass URI
@return new DataNode
* Create a new DataNode from HTML encoded data.
*
* @param encodedData
* encoded data
* @param baseUri
* bass URI
* @return new DataNode
*/
public static DataNode createFromEncoded(String encodedData, String baseUri) {
String data = Entities.unescape(encodedData);

+ 122
- 70
server/src/org/jsoup/nodes/Document.java View File

@@ -1,36 +1,42 @@
package org.jsoup.nodes;

import org.jsoup.helper.Validate;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;

import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.ArrayList;
import java.util.List;

/**
A HTML Document.
import org.jsoup.helper.Validate;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;

@author Jonathan Hedley, jonathan@hedley.net */
/**
* A HTML Document.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Document extends Element {
private OutputSettings outputSettings = new OutputSettings();
private QuirksMode quirksMode = QuirksMode.noQuirks;

/**
Create a new, empty Document.
@param baseUri base URI of document
@see org.jsoup.Jsoup#parse
@see #createShell
* Create a new, empty Document.
*
* @param baseUri
* base URI of document
* @see org.jsoup.Jsoup#parse
* @see #createShell
*/
public Document(String baseUri) {
super(Tag.valueOf("#root"), baseUri);
}

/**
Create a valid, empty shell of a document, suitable for adding more elements to.
@param baseUri baseUri of document
@return document with html, head, and body elements.
* Create a valid, empty shell of a document, suitable for adding more
* elements to.
*
* @param baseUri
* baseUri of document
* @return document with html, head, and body elements.
*/
static public Document createShell(String baseUri) {
Validate.notNull(baseUri);
@@ -44,24 +50,27 @@ public class Document extends Element {
}

/**
Accessor to the document's {@code head} element.
@return {@code head}
* Accessor to the document's {@code head} element.
*
* @return {@code head}
*/
public Element head() {
return findFirstElementByTagName("head", this);
}

/**
Accessor to the document's {@code body} element.
@return {@code body}
* Accessor to the document's {@code body} element.
*
* @return {@code body}
*/
public Element body() {
return findFirstElementByTagName("body", this);
}

/**
Get the string contents of the document's {@code title} element.
@return Trimmed title, or empty string if none set.
* Get the string contents of the document's {@code title} element.
*
* @return Trimmed title, or empty string if none set.
*/
public String title() {
Element titleEl = getElementsByTag("title").first();
@@ -69,9 +78,11 @@ public class Document extends Element {
}

/**
Set the document's {@code title} element. Updates the existing element, or adds {@code title} to {@code head} if
not present
@param title string to set as title
* Set the document's {@code title} element. Updates the existing element,
* or adds {@code title} to {@code head} if not present
*
* @param title
* string to set as title
*/
public void title(String title) {
Validate.notNull(title);
@@ -84,29 +95,38 @@ public class Document extends Element {
}

/**
Create a new Element, with this document's base uri. Does not make the new element a child of this document.
@param tagName element tag name (e.g. {@code a})
@return new element
* Create a new Element, with this document's base uri. Does not make the
* new element a child of this document.
*
* @param tagName
* element tag name (e.g. {@code a})
* @return new element
*/
public Element createElement(String tagName) {
return new Element(Tag.valueOf(tagName), this.baseUri());
return new Element(Tag.valueOf(tagName), baseUri());
}

/**
Normalise the document. This happens after the parse phase so generally does not need to be called.
Moves any text content that is not in the body element into the body.
@return this document after normalisation
* Normalise the document. This happens after the parse phase so generally
* does not need to be called. Moves any text content that is not in the
* body element into the body.
*
* @return this document after normalisation
*/
public Document normalise() {
Element htmlEl = findFirstElementByTagName("html", this);
if (htmlEl == null)
if (htmlEl == null) {
htmlEl = appendElement("html");
if (head() == null)
}
if (head() == null) {
htmlEl.prependElement("head");
if (body() == null)
}
if (body() == null) {
htmlEl.appendElement("body");
}

// pull text nodes out of root, html, and head els, and push into body. non-text nodes are already taken care
// pull text nodes out of root, html, and head els, and push into body.
// non-text nodes are already taken care
// of. do in inverse order to maintain text order.
normaliseTextNodes(head());
normaliseTextNodes(htmlEl);
@@ -114,22 +134,23 @@ public class Document extends Element {

normaliseStructure("head", htmlEl);
normaliseStructure("body", htmlEl);
return this;
}

// does not recurse.
private void normaliseTextNodes(Element element) {
List<Node> toMove = new ArrayList<Node>();
for (Node node: element.childNodes) {
for (Node node : element.childNodes) {
if (node instanceof TextNode) {
TextNode tn = (TextNode) node;
if (!tn.isBlank())
if (!tn.isBlank()) {
toMove.add(tn);
}
}
}

for (int i = toMove.size()-1; i >= 0; i--) {
for (int i = toMove.size() - 1; i >= 0; i--) {
Node node = toMove.get(i);
element.removeChild(node);
body().prependChild(new TextNode(" ", ""));
@@ -137,37 +158,42 @@ public class Document extends Element {
}
}

// merge multiple <head> or <body> contents into one, delete the remainder, and ensure they are owned by <html>
// merge multiple <head> or <body> contents into one, delete the remainder,
// and ensure they are owned by <html>
private void normaliseStructure(String tag, Element htmlEl) {
Elements elements = this.getElementsByTag(tag);
Element master = elements.first(); // will always be available as created above if not existent
Elements elements = getElementsByTag(tag);
Element master = elements.first(); // will always be available as
// created above if not existent
if (elements.size() > 1) { // dupes, move contents to master
List<Node> toMove = new ArrayList<Node>();
for (int i = 1; i < elements.size(); i++) {
Node dupe = elements.get(i);
for (Node node : dupe.childNodes)
for (Node node : dupe.childNodes) {
toMove.add(node);
}
dupe.remove();
}

for (Node dupe : toMove)
for (Node dupe : toMove) {
master.appendChild(dupe);
}
}
// ensure parented by <html>
if (!master.parent().equals(htmlEl)) {
htmlEl.appendChild(master); // includes remove()
htmlEl.appendChild(master); // includes remove()
}
}

// fast method to get first by tag name, used for html, head, body finders
private Element findFirstElementByTagName(String tag, Node node) {
if (node.nodeName().equals(tag))
if (node.nodeName().equals(tag)) {
return (Element) node;
else {
for (Node child: node.childNodes) {
} else {
for (Node child : node.childNodes) {
Element found = findFirstElementByTagName(tag, child);
if (found != null)
if (found != null) {
return found;
}
}
}
return null;
@@ -179,9 +205,12 @@ public class Document extends Element {
}

/**
Set the text of the {@code body} of this document. Any existing nodes within the body will be cleared.
@param text unencoded text
@return this document
* Set the text of the {@code body} of this document. Any existing nodes
* within the body will be cleared.
*
* @param text
* unencoded text
* @return this document
*/
@Override
public Element text(String text) {
@@ -197,12 +226,13 @@ public class Document extends Element {
@Override
public Document clone() {
Document clone = (Document) super.clone();
clone.outputSettings = this.outputSettings.clone();
clone.outputSettings = outputSettings.clone();
return clone;
}

/**
* A Document's output settings control the form of the text() and html() methods.
* A Document's output settings control the form of the text() and html()
* methods.
*/
public static class OutputSettings implements Cloneable {
private Entities.EscapeMode escapeMode = Entities.EscapeMode.base;
@@ -211,14 +241,18 @@ public class Document extends Element {
private boolean prettyPrint = true;
private int indentAmount = 1;

public OutputSettings() {}
public OutputSettings() {
}

/**
* Get the document's current HTML escape mode: <code>base</code>, which provides a limited set of named HTML
* entities and escapes other characters as numbered entities for maximum compatibility; or <code>extended</code>,
* which uses the complete set of HTML named entities.
* Get the document's current HTML escape mode: <code>base</code>, which
* provides a limited set of named HTML entities and escapes other
* characters as numbered entities for maximum compatibility; or
* <code>extended</code>, which uses the complete set of HTML named
* entities.
* <p>
* The default escape mode is <code>base</code>.
*
* @return the document's current escape mode
*/
public Entities.EscapeMode escapeMode() {
@@ -227,7 +261,9 @@ public class Document extends Element {

/**
* Set the document's escape mode
* @param escapeMode the new escape mode to use
*
* @param escapeMode
* the new escape mode to use
* @return the document's output settings, for chaining
*/
public OutputSettings escapeMode(Entities.EscapeMode escapeMode) {
@@ -236,11 +272,14 @@ public class Document extends Element {
}

/**
* Get the document's current output charset, which is used to control which characters are escaped when
* generating HTML (via the <code>html()</code> methods), and which are kept intact.
* Get the document's current output charset, which is used to control
* which characters are escaped when generating HTML (via the
* <code>html()</code> methods), and which are kept intact.
* <p>
* Where possible (when parsing from a URL or File), the document's output charset is automatically set to the
* input charset. Otherwise, it defaults to UTF-8.
* Where possible (when parsing from a URL or File), the document's
* output charset is automatically set to the input charset. Otherwise,
* it defaults to UTF-8.
*
* @return the document's current charset.
*/
public Charset charset() {
@@ -249,7 +288,9 @@ public class Document extends Element {

/**
* Update the document's output charset.
* @param charset the new charset to use.
*
* @param charset
* the new charset to use.
* @return the document's output settings, for chaining
*/
public OutputSettings charset(Charset charset) {
@@ -261,7 +302,9 @@ public class Document extends Element {

/**
* Update the document's output charset.
* @param charset the new charset (by name) to use.
*
* @param charset
* the new charset (by name) to use.
* @return the document's output settings, for chaining
*/
public OutputSettings charset(String charset) {
@@ -274,8 +317,10 @@ public class Document extends Element {
}

/**
* Get if pretty printing is enabled. Default is true. If disabled, the HTML output methods will not re-format
* the output, and the output will generally look like the input.
* Get if pretty printing is enabled. Default is true. If disabled, the
* HTML output methods will not re-format the output, and the output
* will generally look like the input.
*
* @return if pretty printing is enabled.
*/
public boolean prettyPrint() {
@@ -284,7 +329,9 @@ public class Document extends Element {

/**
* Enable or disable pretty printing.
* @param pretty new pretty print setting
*
* @param pretty
* new pretty print setting
* @return this, for chaining
*/
public OutputSettings prettyPrint(boolean pretty) {
@@ -294,6 +341,7 @@ public class Document extends Element {

/**
* Get the current tag indent amount, used when pretty printing.
*
* @return the current indent amount
*/
public int indentAmount() {
@@ -302,7 +350,10 @@ public class Document extends Element {

/**
* Set the indent amount for pretty printing
* @param indentAmount number of spaces to use for indenting each level. Must be >= 0.
*
* @param indentAmount
* number of spaces to use for indenting each level. Must be
* >= 0.
* @return this, for chaining
*/
public OutputSettings indentAmount(int indentAmount) {
@@ -321,13 +372,15 @@ public class Document extends Element {
}
clone.charset(charset.name()); // new charset and charset encoder
clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name());
// indentAmount, prettyPrint are primitives so object.clone() will handle
// indentAmount, prettyPrint are primitives so object.clone() will
// handle
return clone;
}
}

/**
* Get the document's current output settings.
*
* @return the document's current output settings.
*/
public OutputSettings outputSettings() {
@@ -347,4 +400,3 @@ public class Document extends Element {
return this;
}
}


+ 19
- 9
server/src/org/jsoup/nodes/DocumentType.java View File

@@ -11,12 +11,18 @@ public class DocumentType extends Node {

/**
* Create a new doctype element.
* @param name the doctype's name
* @param publicId the doctype's public ID
* @param systemId the doctype's system ID
* @param baseUri the doctype's base URI
*
* @param name
* the doctype's name
* @param publicId
* the doctype's public ID
* @param systemId
* the doctype's system ID
* @param baseUri
* the doctype's base URI
*/
public DocumentType(String name, String publicId, String systemId, String baseUri) {
public DocumentType(String name, String publicId, String systemId,
String baseUri) {
super(baseUri);

Validate.notEmpty(name);
@@ -31,16 +37,20 @@ public class DocumentType extends Node {
}

@Override
void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
void outerHtmlHead(StringBuilder accum, int depth,
Document.OutputSettings out) {
accum.append("<!DOCTYPE ").append(attr("name"));
if (!StringUtil.isBlank(attr("publicId")))
if (!StringUtil.isBlank(attr("publicId"))) {
accum.append(" PUBLIC \"").append(attr("publicId")).append("\"");
if (!StringUtil.isBlank(attr("systemId")))
}
if (!StringUtil.isBlank(attr("systemId"))) {
accum.append(" \"").append(attr("systemId")).append("\"");
}
accum.append('>');
}

@Override
void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {
void outerHtmlTail(StringBuilder accum, int depth,
Document.OutputSettings out) {
}
}

+ 513
- 277
server/src/org/jsoup/nodes/Element.java
File diff suppressed because it is too large
View File


+ 72
- 39
server/src/org/jsoup/nodes/Entities.java View File

@@ -3,18 +3,24 @@ package org.jsoup.nodes;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.CharsetEncoder;
import java.util.*;
import java.util.HashMap;
import java.util.Map;
import java.util.MissingResourceException;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* HTML entities, and escape routines.
* Source: <a href="http://www.w3.org/TR/html5/named-character-references.html#named-character-references">W3C HTML
* named character references</a>.
* HTML entities, and escape routines. Source: <a href=
* "http://www.w3.org/TR/html5/named-character-references.html#named-character-references"
* >W3C HTML named character references</a>.
*/
public class Entities {
public enum EscapeMode {
/** Restricted entities suitable for XHTML output: lt, gt, amp, apos, and quot only. */
/**
* Restricted entities suitable for XHTML output: lt, gt, amp, apos, and
* quot only.
*/
xhtml(xhtmlByVal),
/** Default HTML output entities. */
base(baseByVal),
@@ -36,21 +42,26 @@ public class Entities {
private static final Map<Character, String> xhtmlByVal;
private static final Map<Character, String> baseByVal;
private static final Map<Character, String> fullByVal;
private static final Pattern unescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");
private static final Pattern unescapePattern = Pattern
.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);?");
private static final Pattern strictUnescapePattern = Pattern
.compile("&(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]+\\d*);");

private Entities() {}
private Entities() {
}

/**
* Check if the input is a known named entity
* @param name the possible entity name (e.g. "lt" or "amp"
*
* @param name
* the possible entity name (e.g. "lt" or "amp"
* @return true if a known named entity
*/
public static boolean isNamedEntity(String name) {
return full.containsKey(name);
}

/**
/**
* Get the Character value of the named entity
* @param name named entity (e.g. "lt" or "amp")
* @return the Character value of the named entity (e.g. '<' or '&')
@@ -58,23 +69,25 @@ public class Entities {
public static Character getCharacterByName(String name) {
return full.get(name);
}
static String escape(String string, Document.OutputSettings out) {
return escape(string, out.encoder(), out.escapeMode());
}

static String escape(String string, CharsetEncoder encoder, EscapeMode escapeMode) {
static String escape(String string, CharsetEncoder encoder,
EscapeMode escapeMode) {
StringBuilder accum = new StringBuilder(string.length() * 2);
Map<Character, String> map = escapeMode.getMap();

for (int pos = 0; pos < string.length(); pos++) {
Character c = string.charAt(pos);
if (map.containsKey(c))
if (map.containsKey(c)) {
accum.append('&').append(map.get(c)).append(';');
else if (encoder.canEncode(c))
} else if (encoder.canEncode(c)) {
accum.append(c.charValue());
else
} else {
accum.append("&#").append((int) c).append(';');
}
}

return accum.toString();
@@ -86,39 +99,53 @@ public class Entities {

/**
* Unescape the input string.
*
* @param string
* @param strict if "strict" (that is, requires trailing ';' char, otherwise that's optional)
* @param strict
* if "strict" (that is, requires trailing ';' char, otherwise
* that's optional)
* @return
*/
static String unescape(String string, boolean strict) {
// todo: change this method to use Tokeniser.consumeCharacterReference
if (!string.contains("&"))
if (!string.contains("&")) {
return string;
}

Matcher m = strict? strictUnescapePattern.matcher(string) : unescapePattern.matcher(string); // &(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]\\d*);?
StringBuffer accum = new StringBuffer(string.length()); // pity matcher can't use stringbuilder, avoid syncs
// todo: replace m.appendReplacement with own impl, so StringBuilder and quoteReplacement not required
Matcher m = strict ? strictUnescapePattern.matcher(string)
: unescapePattern.matcher(string); // &(#(x|X)?([0-9a-fA-F]+)|[a-zA-Z]\\d*);?
StringBuffer accum = new StringBuffer(string.length()); // pity matcher
// can't use
// stringbuilder,
// avoid syncs
// todo: replace m.appendReplacement with own impl, so StringBuilder and
// quoteReplacement not required

while (m.find()) {
int charval = -1;
String num = m.group(3);
if (num != null) {
try {
int base = m.group(2) != null ? 16 : 10; // 2 is hex indicator
int base = m.group(2) != null ? 16 : 10; // 2 is hex
// indicator
charval = Integer.valueOf(num, base);
} catch (NumberFormatException e) {
} // skip
} else {
String name = m.group(1);
if (full.containsKey(name))
if (full.containsKey(name)) {
charval = full.get(name);
}
}

if (charval != -1 || charval > 0xFFFF) { // out of range
String c = Character.toString((char) charval);
m.appendReplacement(accum, Matcher.quoteReplacement(c));
} else {
m.appendReplacement(accum, Matcher.quoteReplacement(m.group(0))); // replace with original string
m.appendReplacement(accum, Matcher.quoteReplacement(m.group(0))); // replace
// with
// original
// string
}
}
m.appendTail(accum);
@@ -126,22 +153,23 @@ public class Entities {
}

// xhtml has restricted entities
private static final Object[][] xhtmlArray = {
{"quot", 0x00022},
{"amp", 0x00026},
{"apos", 0x00027},
{"lt", 0x0003C},
{"gt", 0x0003E}
};
private static final Object[][] xhtmlArray = { { "quot", 0x00022 },
{ "amp", 0x00026 }, { "apos", 0x00027 }, { "lt", 0x0003C },
{ "gt", 0x0003E } };

static {
xhtmlByVal = new HashMap<Character, String>();
baseByVal = toCharacterKey(loadEntities("entities-base.properties")); // most common / default
full = loadEntities("entities-full.properties"); // extended and overblown.
baseByVal = toCharacterKey(loadEntities("entities-base.properties")); // most
// common
// /
// default
full = loadEntities("entities-full.properties"); // extended and
// overblown.
fullByVal = toCharacterKey(full);

for (Object[] entity : xhtmlArray) {
Character c = Character.valueOf((char) ((Integer) entity[1]).intValue());
Character c = Character.valueOf((char) ((Integer) entity[1])
.intValue());
xhtmlByVal.put(c, ((String) entity[0]));
}
}
@@ -154,27 +182,32 @@ public class Entities {
properties.load(in);
in.close();
} catch (IOException e) {
throw new MissingResourceException("Error loading entities resource: " + e.getMessage(), "Entities", filename);
throw new MissingResourceException(
"Error loading entities resource: " + e.getMessage(),
"Entities", filename);
}

for (Map.Entry entry: properties.entrySet()) {
Character val = Character.valueOf((char) Integer.parseInt((String) entry.getValue(), 16));
for (Map.Entry entry : properties.entrySet()) {
Character val = Character.valueOf((char) Integer.parseInt(
(String) entry.getValue(), 16));
String name = (String) entry.getKey();
entities.put(name, val);
}
return entities;
}

private static Map<Character, String> toCharacterKey(Map<String, Character> inMap) {
private static Map<Character, String> toCharacterKey(
Map<String, Character> inMap) {
Map<Character, String> outMap = new HashMap<Character, String>();
for (Map.Entry<String, Character> entry: inMap.entrySet()) {
for (Map.Entry<String, Character> entry : inMap.entrySet()) {
Character character = entry.getValue();
String name = entry.getKey();

if (outMap.containsKey(character)) {
// dupe, prefer the lower case version
if (name.toLowerCase().equals(name))
if (name.toLowerCase().equals(name)) {
outMap.put(character, name);
}
} else {
outMap.put(character, name);
}

+ 257
- 145
server/src/org/jsoup/nodes/Node.java View File

@@ -1,21 +1,23 @@
package org.jsoup.nodes;

import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.parser.Parser;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
The base, abstract Node model. Elements, Documents, Comments etc are all Node instances.
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.parser.Parser;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;

@author Jonathan Hedley, jonathan@hedley.net */
/**
* The base, abstract Node model. Elements, Documents, Comments etc are all Node
* instances.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public abstract class Node implements Cloneable {
Node parentNode;
List<Node> childNodes;
@@ -24,14 +26,17 @@ public abstract class Node implements Cloneable {
int siblingIndex;

/**
Create a new Node.
@param baseUri base URI
@param attributes attributes (not null, but may be empty)
* Create a new Node.
*
* @param baseUri
* base URI
* @param attributes
* attributes (not null, but may be empty)
*/
protected Node(String baseUri, Attributes attributes) {
Validate.notNull(baseUri);
Validate.notNull(attributes);
childNodes = new ArrayList<Node>(4);
this.baseUri = baseUri.trim();
this.attributes = attributes;
@@ -42,7 +47,8 @@ public abstract class Node implements Cloneable {
}

/**
* Default constructor. Doesn't setup base uri, children, or attributes; use with caution.
* Default constructor. Doesn't setup base uri, children, or attributes; use
* with caution.
*/
protected Node() {
childNodes = Collections.emptyList();
@@ -50,18 +56,23 @@ public abstract class Node implements Cloneable {
}

/**
Get the node name of this node. Use for debugging purposes and not logic switching (for that, use instanceof).
@return node name
* Get the node name of this node. Use for debugging purposes and not logic
* switching (for that, use instanceof).
*
* @return node name
*/
public abstract String nodeName();

/**
* Get an attribute's value by its key.
* <p/>
* To get an absolute URL from an attribute that may be a relative URL, prefix the key with <code><b>abs</b></code>,
* which is a shortcut to the {@link #absUrl} method.
* E.g.: <blockquote><code>String url = a.attr("abs:href");</code></blockquote>
* @param attributeKey The attribute key.
* To get an absolute URL from an attribute that may be a relative URL,
* prefix the key with <code><b>abs</b></code>, which is a shortcut to the
* {@link #absUrl} method. E.g.: <blockquote>
* <code>String url = a.attr("abs:href");</code></blockquote>
*
* @param attributeKey
* The attribute key.
* @return The attribute, or empty string if not present (to avoid nulls).
* @see #attributes()
* @see #hasAttr(String)
@@ -70,25 +81,33 @@ public abstract class Node implements Cloneable {
public String attr(String attributeKey) {
Validate.notNull(attributeKey);

if (attributes.hasKey(attributeKey))
if (attributes.hasKey(attributeKey)) {
return attributes.get(attributeKey);
else if (attributeKey.toLowerCase().startsWith("abs:"))
} else if (attributeKey.toLowerCase().startsWith("abs:")) {
return absUrl(attributeKey.substring("abs:".length()));
else return "";
} else {
return "";
}
}

/**
* Get all of the element's attributes.
* @return attributes (which implements iterable, in same order as presented in original HTML).
*
* @return attributes (which implements iterable, in same order as presented
* in original HTML).
*/
public Attributes attributes() {
return attributes;
}

/**
* Set an attribute (key=value). If the attribute already exists, it is replaced.
* @param attributeKey The attribute key.
* @param attributeValue The attribute value.
* Set an attribute (key=value). If the attribute already exists, it is
* replaced.
*
* @param attributeKey
* The attribute key.
* @param attributeValue
* The attribute value.
* @return this (for chaining)
*/
public Node attr(String attributeKey, String attributeValue) {
@@ -98,7 +117,9 @@ public abstract class Node implements Cloneable {

/**
* Test if this element has an attribute.
* @param attributeKey The attribute key to check.
*
* @param attributeKey
* The attribute key to check.
* @return true if the attribute exists, false if not.
*/
public boolean hasAttr(String attributeKey) {
@@ -106,15 +127,18 @@ public abstract class Node implements Cloneable {

if (attributeKey.toLowerCase().startsWith("abs:")) {
String key = attributeKey.substring("abs:".length());
if (attributes.hasKey(key) && !absUrl(key).equals(""))
if (attributes.hasKey(key) && !absUrl(key).equals("")) {
return true;
}
}
return attributes.hasKey(attributeKey);
}

/**
* Remove an attribute from this element.
* @param attributeKey The attribute to remove.
*
* @param attributeKey
* The attribute to remove.
* @return this (for chaining)
*/
public Node removeAttr(String attributeKey) {
@@ -124,47 +148,56 @@ public abstract class Node implements Cloneable {
}

/**
Get the base URI of this node.
@return base URI
* Get the base URI of this node.
*
* @return base URI
*/
public String baseUri() {
return baseUri;
}

/**
Update the base URI of this node and all of its descendants.
@param baseUri base URI to set
* Update the base URI of this node and all of its descendants.
*
* @param baseUri
* base URI to set
*/
public void setBaseUri(final String baseUri) {
Validate.notNull(baseUri);

traverse(new NodeVisitor() {
@Override
public void head(Node node, int depth) {
node.baseUri = baseUri;
}

@Override
public void tail(Node node, int depth) {
}
});
}

/**
* Get an absolute URL from a URL attribute that may be relative (i.e. an <code>&lt;a href></code> or
* <code>&lt;img src></code>).
* Get an absolute URL from a URL attribute that may be relative (i.e. an
* <code>&lt;a href></code> or <code>&lt;img src></code>).
* <p/>
* E.g.: <code>String absUrl = linkEl.absUrl("href");</code>
* <p/>
* If the attribute value is already absolute (i.e. it starts with a protocol, like
* <code>http://</code> or <code>https://</code> etc), and it successfully parses as a URL, the attribute is
* returned directly. Otherwise, it is treated as a URL relative to the element's {@link #baseUri}, and made
* absolute using that.
* If the attribute value is already absolute (i.e. it starts with a
* protocol, like <code>http://</code> or <code>https://</code> etc), and it
* successfully parses as a URL, the attribute is returned directly.
* Otherwise, it is treated as a URL relative to the element's
* {@link #baseUri}, and made absolute using that.
* <p/>
* As an alternate, you can use the {@link #attr} method with the <code>abs:</code> prefix, e.g.:
* As an alternate, you can use the {@link #attr} method with the
* <code>abs:</code> prefix, e.g.:
* <code>String absUrl = linkEl.attr("abs:href");</code>
*
* @param attributeKey The attribute key
* @return An absolute URL if one could be made, or an empty string (not null) if the attribute was missing or
* could not be made successfully into a URL.
*
* @param attributeKey
* The attribute key
* @return An absolute URL if one could be made, or an empty string (not
* null) if the attribute was missing or could not be made
* successfully into a URL.
* @see #attr
* @see java.net.URL#URL(java.net.URL, String)
*/
@@ -180,13 +213,16 @@ public abstract class Node implements Cloneable {
try {
base = new URL(baseUri);
} catch (MalformedURLException e) {
// the base is unsuitable, but the attribute may be abs on its own, so try that
// the base is unsuitable, but the attribute may be abs on
// its own, so try that
URL abs = new URL(relUrl);
return abs.toExternalForm();
}
// workaround: java resolves '//path/file + ?foo' to '//path/?foo', not '//path/file?foo' as desired
if (relUrl.startsWith("?"))
// workaround: java resolves '//path/file + ?foo' to
// '//path/?foo', not '//path/file?foo' as desired
if (relUrl.startsWith("?")) {
relUrl = base.getPath() + relUrl;
}
URL abs = new URL(base, relUrl);
return abs.toExternalForm();
} catch (MalformedURLException e) {
@@ -196,50 +232,58 @@ public abstract class Node implements Cloneable {
}

/**
Get a child node by index
@param index index of child node
@return the child node at this index.
* Get a child node by index
*
* @param index
* index of child node
* @return the child node at this index.
*/
public Node childNode(int index) {
return childNodes.get(index);
}

/**
Get this node's children. Presented as an unmodifiable list: new children can not be added, but the child nodes
themselves can be manipulated.
@return list of children. If no children, returns an empty list.
* Get this node's children. Presented as an unmodifiable list: new children
* can not be added, but the child nodes themselves can be manipulated.
*
* @return list of children. If no children, returns an empty list.
*/
public List<Node> childNodes() {
return Collections.unmodifiableList(childNodes);
}
protected Node[] childNodesAsArray() {
return childNodes.toArray(new Node[childNodes().size()]);
}

/**
Gets this node's parent node.
@return parent node; or null if no parent.
* Gets this node's parent node.
*
* @return parent node; or null if no parent.
*/
public Node parent() {
return parentNode;
}
/**
* Gets the Document associated with this Node.
* @return the Document associated with this Node, or null if there is no such Document.
* Gets the Document associated with this Node.
*
* @return the Document associated with this Node, or null if there is no
* such Document.
*/
public Document ownerDocument() {
if (this instanceof Document)
if (this instanceof Document) {
return (Document) this;
else if (parentNode == null)
} else if (parentNode == null) {
return null;
else
} else {
return parentNode.ownerDocument();
}
}
/**
* Remove (delete) this node from the DOM tree. If this node has children, they are also removed.
* Remove (delete) this node from the DOM tree. If this node has children,
* they are also removed.
*/
public void remove() {
Validate.notNull(parentNode);
@@ -247,8 +291,11 @@ public abstract class Node implements Cloneable {
}

/**
* Insert the specified HTML into the DOM before this node (i.e. as a preceding sibling).
* @param html HTML to add before this node
* Insert the specified HTML into the DOM before this node (i.e. as a
* preceding sibling).
*
* @param html
* HTML to add before this node
* @return this node, for chaining
* @see #after(String)
*/
@@ -258,8 +305,11 @@ public abstract class Node implements Cloneable {
}

/**
* Insert the specified node into the DOM before this node (i.e. as a preceding sibling).
* @param node to add before this node
* Insert the specified node into the DOM before this node (i.e. as a
* preceding sibling).
*
* @param node
* to add before this node
* @return this node, for chaining
* @see #after(Node)
*/
@@ -272,19 +322,25 @@ public abstract class Node implements Cloneable {
}

/**
* Insert the specified HTML into the DOM after this node (i.e. as a following sibling).
* @param html HTML to add after this node
* Insert the specified HTML into the DOM after this node (i.e. as a
* following sibling).
*
* @param html
* HTML to add after this node
* @return this node, for chaining
* @see #before(String)
*/
public Node after(String html) {
addSiblingHtml(siblingIndex()+1, html);
addSiblingHtml(siblingIndex() + 1, html);
return this;
}

/**
* Insert the specified node into the DOM after this node (i.e. as a following sibling).
* @param node to add after this node
* Insert the specified node into the DOM after this node (i.e. as a
* following sibling).
*
* @param node
* to add after this node
* @return this node, for chaining
* @see #before(Node)
*/
@@ -292,7 +348,7 @@ public abstract class Node implements Cloneable {
Validate.notNull(node);
Validate.notNull(parentNode);

parentNode.addChildren(siblingIndex()+1, node);
parentNode.addChildren(siblingIndex() + 1, node);
return this;
}

@@ -300,31 +356,39 @@ public abstract class Node implements Cloneable {
Validate.notNull(html);
Validate.notNull(parentNode);

Element context = parent() instanceof Element ? (Element) parent() : null;
Element context = parent() instanceof Element ? (Element) parent()
: null;
List<Node> nodes = Parser.parseFragment(html, context, baseUri());
parentNode.addChildren(index, nodes.toArray(new Node[nodes.size()]));
}

/**
Wrap the supplied HTML around this node.
@param html HTML to wrap around this element, e.g. {@code <div class="head"></div>}. Can be arbitrarily deep.
@return this node, for chaining.
* Wrap the supplied HTML around this node.
*
* @param html
* HTML to wrap around this element, e.g.
* {@code <div class="head"></div>}. Can be arbitrarily deep.
* @return this node, for chaining.
*/
public Node wrap(String html) {
Validate.notEmpty(html);

Element context = parent() instanceof Element ? (Element) parent() : null;
List<Node> wrapChildren = Parser.parseFragment(html, context, baseUri());
Element context = parent() instanceof Element ? (Element) parent()
: null;
List<Node> wrapChildren = Parser
.parseFragment(html, context, baseUri());
Node wrapNode = wrapChildren.get(0);
if (wrapNode == null || !(wrapNode instanceof Element)) // nothing to wrap with; noop
if (wrapNode == null || !(wrapNode instanceof Element)) {
return null;
}

Element wrap = (Element) wrapNode;
Element deepest = getDeepChild(wrap);
parentNode.replaceChild(this, wrap);
deepest.addChildren(this);

// remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is remainder
// remainder (unbalanced wrap, like <div></div><p></p> -- The <p> is
// remainder
if (wrapChildren.size() > 0) {
for (int i = 0; i < wrapChildren.size(); i++) {
Node remainder = wrapChildren.get(i);
@@ -336,15 +400,19 @@ public abstract class Node implements Cloneable {
}

/**
* Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping
* the node but keeping its children.
* Removes this node from the DOM, and moves its children up into the node's
* parent. This has the effect of dropping the node but keeping its
* children.
* <p/>
* For example, with the input html:<br/>
* {@code <div>One <span>Two <b>Three</b></span></div>}<br/>
* Calling {@code element.unwrap()} on the {@code span} element will result in the html:<br/>
* Calling {@code element.unwrap()} on the {@code span} element will result
* in the html:<br/>
* {@code <div>One Two <b>Three</b></div>}<br/>
* and the {@code "Two "} {@link TextNode} being returned.
* @return the first child of this node, after the node has been unwrapped. Null if the node had no children.
*
* @return the first child of this node, after the node has been unwrapped.
* Null if the node had no children.
* @see #remove()
* @see #wrap(String)
*/
@@ -353,23 +421,26 @@ public abstract class Node implements Cloneable {

int index = siblingIndex;
Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null;
parentNode.addChildren(index, this.childNodesAsArray());
this.remove();
parentNode.addChildren(index, childNodesAsArray());
remove();

return firstChild;
}

private Element getDeepChild(Element el) {
List<Element> children = el.children();
if (children.size() > 0)
if (children.size() > 0) {
return getDeepChild(children.get(0));
else
} else {
return el;
}
}
/**
* Replace this node in the DOM with the supplied node.
* @param in the node that will will replace the existing node.
*
* @param in
* the node that will will replace the existing node.
*/
public void replaceWith(Node in) {
Validate.notNull(in);
@@ -378,17 +449,19 @@ public abstract class Node implements Cloneable {
}

protected void setParentNode(Node parentNode) {
if (this.parentNode != null)
if (this.parentNode != null) {
this.parentNode.removeChild(this);
}
this.parentNode = parentNode;
}

protected void replaceChild(Node out, Node in) {
Validate.isTrue(out.parentNode == this);
Validate.notNull(in);
if (in.parentNode != null)
if (in.parentNode != null) {
in.parentNode.removeChild(in);
}

Integer index = out.siblingIndex();
childNodes.set(index, in);
in.parentNode = this;
@@ -405,11 +478,12 @@ public abstract class Node implements Cloneable {
}

protected void addChildren(Node... children) {
//most used. short circuit addChildren(int), which hits reindex children and array copy
for (Node child: children) {
// most used. short circuit addChildren(int), which hits reindex
// children and array copy
for (Node child : children) {
reparentChild(child);
childNodes.add(child);
child.setSiblingIndex(childNodes.size()-1);
child.setSiblingIndex(childNodes.size() - 1);
}
}

@@ -424,85 +498,100 @@ public abstract class Node implements Cloneable {
}

private void reparentChild(Node child) {
if (child.parentNode != null)
if (child.parentNode != null) {
child.parentNode.removeChild(child);
}
child.setParentNode(this);
}
private void reindexChildren() {
for (int i = 0; i < childNodes.size(); i++) {
childNodes.get(i).setSiblingIndex(i);
}
}
/**
Retrieves this node's sibling nodes. Similar to {@link #childNodes() node.parent.childNodes()}, but does not
include this node (a node is not a sibling of itself).
@return node siblings. If the node has no parent, returns an empty list.
* Retrieves this node's sibling nodes. Similar to {@link #childNodes()
* node.parent.childNodes()}, but does not include this node (a node is not
* a sibling of itself).
*
* @return node siblings. If the node has no parent, returns an empty list.
*/
public List<Node> siblingNodes() {
if (parentNode == null)
if (parentNode == null) {
return Collections.emptyList();
}

List<Node> nodes = parentNode.childNodes;
List<Node> siblings = new ArrayList<Node>(nodes.size() - 1);
for (Node node: nodes)
if (node != this)
for (Node node : nodes) {
if (node != this) {
siblings.add(node);
}
}
return siblings;
}

/**
Get this node's next sibling.
@return next sibling, or null if this is the last sibling
* Get this node's next sibling.
*
* @return next sibling, or null if this is the last sibling
*/
public Node nextSibling() {
if (parentNode == null)
if (parentNode == null) {
return null; // root
}

List<Node> siblings = parentNode.childNodes;
Integer index = siblingIndex();
Validate.notNull(index);
if (siblings.size() > index+1)
return siblings.get(index+1);
else
if (siblings.size() > index + 1) {
return siblings.get(index + 1);
} else {
return null;
}
}

/**
Get this node's previous sibling.
@return the previous sibling, or null if this is the first sibling
* Get this node's previous sibling.
*
* @return the previous sibling, or null if this is the first sibling
*/
public Node previousSibling() {
if (parentNode == null)
if (parentNode == null) {
return null; // root
}

List<Node> siblings = parentNode.childNodes;
Integer index = siblingIndex();
Validate.notNull(index);
if (index > 0)
return siblings.get(index-1);
else
if (index > 0) {
return siblings.get(index - 1);
} else {
return null;
}
}

/**
* Get the list index of this node in its node sibling list. I.e. if this is the first node
* sibling, returns 0.
* Get the list index of this node in its node sibling list. I.e. if this is
* the first node sibling, returns 0.
*
* @return position in node sibling list
* @see org.jsoup.nodes.Element#elementSiblingIndex()
*/
public int siblingIndex() {
return siblingIndex;
}
protected void setSiblingIndex(int siblingIndex) {
this.siblingIndex = siblingIndex;
}

/**
* Perform a depth-first traversal through this node and its descendants.
* @param nodeVisitor the visitor callbacks to perform on each node
*
* @param nodeVisitor
* the visitor callbacks to perform on each node
* @return this node, for chaining
*/
public Node traverse(NodeVisitor nodeVisitor) {
@@ -513,8 +602,9 @@ public abstract class Node implements Cloneable {
}

/**
Get the outer HTML of this node.
@return HTML
* Get the outer HTML of this node.
*
* @return HTML
*/
public String outerHtml() {
StringBuilder accum = new StringBuilder(128);
@@ -523,34 +613,47 @@ public abstract class Node implements Cloneable {
}

protected void outerHtml(StringBuilder accum) {
new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings())).traverse(this);
new NodeTraversor(new OuterHtmlVisitor(accum, getOutputSettings()))
.traverse(this);
}

// if this node has no document (or parent), retrieve the default output settings
// if this node has no document (or parent), retrieve the default output
// settings
private Document.OutputSettings getOutputSettings() {
return ownerDocument() != null ? ownerDocument().outputSettings() : (new Document("")).outputSettings();
return ownerDocument() != null ? ownerDocument().outputSettings()
: (new Document("")).outputSettings();
}

/**
Get the outer HTML of this node.
@param accum accumulator to place HTML into
* Get the outer HTML of this node.
*
* @param accum
* accumulator to place HTML into
*/
abstract void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out);
abstract void outerHtmlHead(StringBuilder accum, int depth,
Document.OutputSettings out);

abstract void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out);
abstract void outerHtmlTail(StringBuilder accum, int depth,
Document.OutputSettings out);

@Override
public String toString() {
return outerHtml();
}

protected void indent(StringBuilder accum, int depth, Document.OutputSettings out) {
accum.append("\n").append(StringUtil.padding(depth * out.indentAmount()));
protected void indent(StringBuilder accum, int depth,
Document.OutputSettings out) {
accum.append("\n").append(
StringUtil.padding(depth * out.indentAmount()));
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
// todo: have nodes hold a child index, compare against that and parent (not children)
if (this == o) {
return true;
}
// todo: have nodes hold a child index, compare against that and parent
// (not children)
return false;
}

@@ -563,11 +666,14 @@ public abstract class Node implements Cloneable {
}

/**
* Create a stand-alone, deep copy of this node, and all of its children. The cloned node will have no siblings or
* parent node. As a stand-alone object, any changes made to the clone or any of its children will not impact the
* original node.
* Create a stand-alone, deep copy of this node, and all of its children.
* The cloned node will have no siblings or parent node. As a stand-alone
* object, any changes made to the clone or any of its children will not
* impact the original node.
* <p>
* The cloned node may be adopted into another Document or node structure using {@link Element#appendChild(Node)}.
* The cloned node may be adopted into another Document or node structure
* using {@link Element#appendChild(Node)}.
*
* @return stand-alone cloned node
*/
@Override
@@ -588,8 +694,11 @@ public abstract class Node implements Cloneable {
clone.attributes = attributes != null ? attributes.clone() : null;
clone.baseUri = baseUri;
clone.childNodes = new ArrayList<Node>(childNodes.size());
for (Node child: childNodes)
clone.childNodes.add(child.doClone(clone)); // clone() creates orphans, doClone() keeps parent
for (Node child : childNodes) {
clone.childNodes.add(child.doClone(clone)); // clone() creates
// orphans, doClone()
// keeps parent
}

return clone;
}
@@ -603,13 +712,16 @@ public abstract class Node implements Cloneable {
this.out = out;
}

@Override
public void head(Node node, int depth) {
node.outerHtmlHead(accum, depth, out);
}

@Override
public void tail(Node node, int depth) {
if (!node.nodeName().equals("#text")) // saves a void hit.
if (!node.nodeName().equals("#text")) {
node.outerHtmlTail(accum, depth, out);
}
}
}
}

+ 62
- 31
server/src/org/jsoup/nodes/TextNode.java View File

@@ -4,111 +4,142 @@ import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;

/**
A text node.

@author Jonathan Hedley, jonathan@hedley.net */
* A text node.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class TextNode extends Node {
/*
TextNode is a node, and so by default comes with attributes and children. The attributes are seldom used, but use
memory, and the child nodes are never used. So we don't have them, and override accessors to attributes to create
them as needed on the fly.
* TextNode is a node, and so by default comes with attributes and children.
* The attributes are seldom used, but use memory, and the child nodes are
* never used. So we don't have them, and override accessors to attributes
* to create them as needed on the fly.
*/
private static final String TEXT_KEY = "text";
String text;

/**
Create a new TextNode representing the supplied (unencoded) text).

@param text raw text
@param baseUri base uri
@see #createFromEncoded(String, String)
* Create a new TextNode representing the supplied (unencoded) text).
*
* @param text
* raw text
* @param baseUri
* base uri
* @see #createFromEncoded(String, String)
*/
public TextNode(String text, String baseUri) {
this.baseUri = baseUri;
this.text = text;
}

@Override
public String nodeName() {
return "#text";
}
/**
* Get the text content of this text node.
*
* @return Unencoded, normalised text.
* @see TextNode#getWholeText()
*/
public String text() {
return normaliseWhitespace(getWholeText());
}
/**
* Set the text content of this text node.
* @param text unencoded text
*
* @param text
* unencoded text
* @return this, for chaining
*/
public TextNode text(String text) {
this.text = text;
if (attributes != null)
if (attributes != null) {
attributes.put(TEXT_KEY, text);
}
return this;
}

/**
Get the (unencoded) text of this text node, including any newlines and spaces present in the original.
@return text
* Get the (unencoded) text of this text node, including any newlines and
* spaces present in the original.
*
* @return text
*/
public String getWholeText() {
return attributes == null ? text : attributes.get(TEXT_KEY);
}

/**
Test if this text node is blank -- that is, empty or only whitespace (including newlines).
@return true if this document is empty or only whitespace, false if it contains any text content.
* Test if this text node is blank -- that is, empty or only whitespace
* (including newlines).
*
* @return true if this document is empty or only whitespace, false if it
* contains any text content.
*/
public boolean isBlank() {
return StringUtil.isBlank(getWholeText());
}

/**
* Split this text node into two nodes at the specified string offset. After splitting, this node will contain the
* original text up to the offset, and will have a new text node sibling containing the text after the offset.
* @param offset string offset point to split node at.
* Split this text node into two nodes at the specified string offset. After
* splitting, this node will contain the original text up to the offset, and
* will have a new text node sibling containing the text after the offset.
*
* @param offset
* string offset point to split node at.
* @return the newly created text node containing the text after the offset.
*/
public TextNode splitText(int offset) {
Validate.isTrue(offset >= 0, "Split offset must be not be negative");
Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length");
Validate.isTrue(offset < text.length(),
"Split offset must not be greater than current text length");

String head = getWholeText().substring(0, offset);
String tail = getWholeText().substring(offset);
text(head);
TextNode tailNode = new TextNode(tail, this.baseUri());
if (parent() != null)
parent().addChildren(siblingIndex()+1, tailNode);
TextNode tailNode = new TextNode(tail, baseUri());
if (parent() != null) {
parent().addChildren(siblingIndex() + 1, tailNode);
}

return tailNode;
}

void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
@Override
void outerHtmlHead(StringBuilder accum, int depth,
Document.OutputSettings out) {
String html = Entities.escape(getWholeText(), out);
if (out.prettyPrint() && parent() instanceof Element && !((Element) parent()).preserveWhitespace()) {
if (out.prettyPrint() && parent() instanceof Element
&& !((Element) parent()).preserveWhitespace()) {
html = normaliseWhitespace(html);
}

if (out.prettyPrint() && siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank())
if (out.prettyPrint() && siblingIndex() == 0
&& parentNode instanceof Element
&& ((Element) parentNode).tag().formatAsBlock() && !isBlank()) {
indent(accum, depth, out);
}
accum.append(html);
}

void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
@Override
void outerHtmlTail(StringBuilder accum, int depth,
Document.OutputSettings out) {
}

@Override
public String toString() {
return outerHtml();
}

/**
* Create a new TextNode from HTML encoded (aka escaped) data.
* @param encodedText Text containing encoded HTML (e.g. &amp;lt;)
*
* @param encodedText
* Text containing encoded HTML (e.g. &amp;lt;)
* @return TextNode containing unencoded data (e.g. &lt;)
*/
public static TextNode createFromEncoded(String encodedText, String baseUri) {

+ 31
- 18
server/src/org/jsoup/nodes/XmlDeclaration.java View File

@@ -1,47 +1,60 @@
package org.jsoup.nodes;

/**
An XML Declaration.

@author Jonathan Hedley, jonathan@hedley.net */
* An XML Declaration.
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class XmlDeclaration extends Node {
private static final String DECL_KEY = "declaration";
private final boolean isProcessingInstruction; // <! if true, <? if false, declaration (and last data char should be ?)
private final boolean isProcessingInstruction; // <! if true, <? if false,
// declaration (and last data
// char should be ?)

/**
Create a new XML declaration
@param data data
@param baseUri base uri
@param isProcessingInstruction is processing instruction
* Create a new XML declaration
*
* @param data
* data
* @param baseUri
* base uri
* @param isProcessingInstruction
* is processing instruction
*/
public XmlDeclaration(String data, String baseUri, boolean isProcessingInstruction) {
public XmlDeclaration(String data, String baseUri,
boolean isProcessingInstruction) {
super(baseUri);
attributes.put(DECL_KEY, data);
this.isProcessingInstruction = isProcessingInstruction;
}

@Override
public String nodeName() {
return "#declaration";
}

/**
Get the unencoded XML declaration.
@return XML declaration
* Get the unencoded XML declaration.
*
* @return XML declaration
*/
public String getWholeDeclaration() {
return attributes.get(DECL_KEY);
}

void outerHtmlHead(StringBuilder accum, int depth, Document.OutputSettings out) {
accum
.append("<")
.append(isProcessingInstruction ? "!" : "?")
.append(getWholeDeclaration())
.append(">");
@Override
void outerHtmlHead(StringBuilder accum, int depth,
Document.OutputSettings out) {
accum.append("<").append(isProcessingInstruction ? "!" : "?")
.append(getWholeDeclaration()).append(">");
}

void outerHtmlTail(StringBuilder accum, int depth, Document.OutputSettings out) {}
@Override
void outerHtmlTail(StringBuilder accum, int depth,
Document.OutputSettings out) {
}

@Override
public String toString() {
return outerHtml();
}

+ 34
- 20
server/src/org/jsoup/parser/CharacterReader.java View File

@@ -3,7 +3,7 @@ package org.jsoup.parser;
import org.jsoup.helper.Validate;

/**
CharacterReader consumes tokens off a string. To replace the old TokenQueue.
* CharacterReader consumes tokens off a string. To replace the old TokenQueue.
*/
class CharacterReader {
static final char EOF = (char) -1;
@@ -15,10 +15,11 @@ class CharacterReader {

CharacterReader(String input) {
Validate.notNull(input);
input = input.replaceAll("\r\n?", "\n"); // normalise carriage returns to newlines
input = input.replaceAll("\r\n?", "\n"); // normalise carriage returns
// to newlines

this.input = input;
this.length = input.length();
length = input.length();
}

int pos() {
@@ -87,8 +88,9 @@ class CharacterReader {
OUTER: while (!isEmpty()) {
char c = input.charAt(pos);
for (char seek : seq) {
if (seek == c)
if (seek == c) {
break OUTER;
}
}
pos++;
}
@@ -106,10 +108,11 @@ class CharacterReader {
int start = pos;
while (!isEmpty()) {
char c = input.charAt(pos);
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
pos++;
else
} else {
break;
}
}

return input.substring(start, pos);
@@ -119,17 +122,19 @@ class CharacterReader {
int start = pos;
while (!isEmpty()) {
char c = input.charAt(pos);
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
pos++;
else
} else {
break;
}
}
while (!isEmpty()) {
char c = input.charAt(pos);
if (c >= '0' && c <= '9')
if (c >= '0' && c <= '9') {
pos++;
else
} else {
break;
}
}

return input.substring(start, pos);
@@ -139,10 +144,12 @@ class CharacterReader {
int start = pos;
while (!isEmpty()) {
char c = input.charAt(pos);
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')
|| (c >= 'a' && c <= 'f')) {
pos++;
else
} else {
break;
}
}
return input.substring(start, pos);
}
@@ -151,10 +158,11 @@ class CharacterReader {
int start = pos;
while (!isEmpty()) {
char c = input.charAt(pos);
if (c >= '0' && c <= '9')
if (c >= '0' && c <= '9') {
pos++;
else
} else {
break;
}
}
return input.substring(start, pos);
}
@@ -173,27 +181,31 @@ class CharacterReader {
}

boolean matchesAny(char... seq) {
if (isEmpty())
if (isEmpty()) {
return false;
}

char c = input.charAt(pos);
for (char seek : seq) {
if (seek == c)
if (seek == c) {
return true;
}
}
return false;
}

boolean matchesLetter() {
if (isEmpty())
if (isEmpty()) {
return false;
}
char c = input.charAt(pos);
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}

boolean matchesDigit() {
if (isEmpty())
if (isEmpty()) {
return false;
}
char c = input.charAt(pos);
return (c >= '0' && c <= '9');
}
@@ -217,10 +229,12 @@ class CharacterReader {
}

boolean containsIgnoreCase(String seq) {
// used to check presence of </title>, </style>. only finds consistent case.
// used to check presence of </title>, </style>. only finds consistent
// case.
String loScan = seq.toLowerCase();
String hiScan = seq.toUpperCase();
return (input.indexOf(loScan, pos) > -1) || (input.indexOf(hiScan, pos) > -1);
return (input.indexOf(loScan, pos) > -1)
|| (input.indexOf(hiScan, pos) > -1);
}

@Override

+ 184
- 102
server/src/org/jsoup/parser/HtmlTreeBuilder.java View File

@@ -1,15 +1,20 @@
package org.jsoup.parser;

import org.jsoup.helper.DescendableLinkedList;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.*;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import org.jsoup.helper.DescendableLinkedList;
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;

/**
* HTML Tree Builder; creates a DOM from Tokens.
*/
@@ -21,15 +26,26 @@ class HtmlTreeBuilder extends TreeBuilder {
private boolean baseUriSetFromDoc = false;
private Element headElement; // the current head element
private Element formElement; // the current form element
private Element contextElement; // fragment parse context -- could be null even if fragment parsing
private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active (open) formatting elements
private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars in table to be shifted out
private Element contextElement; // fragment parse context -- could be null
// even if fragment parsing
private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active
// (open)
// formatting
// elements
private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars
// in
// table
// to
// be
// shifted
// out

private boolean framesetOk = true; // if ok to go into frameset
private boolean fosterInserts = false; // if next inserts should be fostered
private boolean fragmentParsing = false; // if parsing a fragment of html

HtmlTreeBuilder() {}
HtmlTreeBuilder() {
}

@Override
Document parse(String input, String baseUri, ParseErrorList errors) {
@@ -37,7 +53,8 @@ class HtmlTreeBuilder extends TreeBuilder {
return super.parse(input, baseUri, errors);
}

List<Node> parseFragment(String inputFragment, Element context, String baseUri, ParseErrorList errors) {
List<Node> parseFragment(String inputFragment, Element context,
String baseUri, ParseErrorList errors) {
// context may be null
state = HtmlTreeBuilderState.Initial;
initialiseParse(inputFragment, baseUri, errors);
@@ -46,42 +63,48 @@ class HtmlTreeBuilder extends TreeBuilder {
Element root = null;

if (context != null) {
if (context.ownerDocument() != null) // quirks setup:
if (context.ownerDocument() != null) {
doc.quirksMode(context.ownerDocument().quirksMode());
}

// initialise the tokeniser state:
String contextTag = context.tagName();
if (StringUtil.in(contextTag, "title", "textarea"))
if (StringUtil.in(contextTag, "title", "textarea")) {
tokeniser.transition(TokeniserState.Rcdata);
else if (StringUtil.in(contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
} else if (StringUtil.in(contextTag, "iframe", "noembed",
"noframes", "style", "xmp")) {
tokeniser.transition(TokeniserState.Rawtext);
else if (contextTag.equals("script"))
} else if (contextTag.equals("script")) {
tokeniser.transition(TokeniserState.ScriptData);
else if (contextTag.equals(("noscript")))
tokeniser.transition(TokeniserState.Data); // if scripting enabled, rawtext
else if (contextTag.equals("plaintext"))
} else if (contextTag.equals(("noscript"))) {
tokeniser.transition(TokeniserState.Data); // if scripting
// enabled, rawtext
} else if (contextTag.equals("plaintext")) {
tokeniser.transition(TokeniserState.Data);
else
} else {
tokeniser.transition(TokeniserState.Data); // default
}

root = new Element(Tag.valueOf("html"), baseUri);
doc.appendChild(root);
stack.push(root);
resetInsertionMode();
// todo: setup form element to nearest form on context (up ancestor chain)
// todo: setup form element to nearest form on context (up ancestor
// chain)
}

runParser();
if (context != null)
if (context != null) {
return root.childNodes();
else
} else {
return doc.childNodes();
}
}

@Override
protected boolean process(Token token) {
currentToken = token;
return this.state.process(token, this);
return state.process(token, this);
}

boolean process(Token token, HtmlTreeBuilderState state) {
@@ -122,14 +145,17 @@ class HtmlTreeBuilder extends TreeBuilder {
}

void maybeSetBaseUri(Element base) {
if (baseUriSetFromDoc) // only listen to the first <base href> in parse
if (baseUriSetFromDoc) {
return;
}

String href = base.absUrl("href");
if (href.length() != 0) { // ignore <base target> etc
baseUri = href;
baseUriSetFromDoc = true;
doc.setBaseUri(href); // set on the doc so doc.createElement(Tag) will get updated base, and to update all descendants
doc.setBaseUri(href); // set on the doc so doc.createElement(Tag)
// will get updated base, and to update all
// descendants
}
}

@@ -138,20 +164,26 @@ class HtmlTreeBuilder extends TreeBuilder {
}

void error(HtmlTreeBuilderState state) {
if (errors.canAddError())
errors.add(new ParseError(reader.pos(), "Unexpected token [%s] when in state [%s]", currentToken.tokenType(), state));
if (errors.canAddError()) {
errors.add(new ParseError(reader.pos(),
"Unexpected token [%s] when in state [%s]", currentToken
.tokenType(), state));
}
}

Element insert(Token.StartTag startTag) {
// handle empty unknown tags
// when the spec expects an empty tag, will directly hit insertEmpty, so won't generate fake end tag.
// when the spec expects an empty tag, will directly hit insertEmpty, so
// won't generate fake end tag.
if (startTag.isSelfClosing() && !Tag.isKnownTag(startTag.name())) {
Element el = insertEmpty(startTag);
process(new Token.EndTag(el.tagName())); // ensure we get out of whatever state we are in
process(new Token.EndTag(el.tagName())); // ensure we get out of
// whatever state we are in
return el;
}
Element el = new Element(Tag.valueOf(startTag.name()), baseUri, startTag.attributes);

Element el = new Element(Tag.valueOf(startTag.name()), baseUri,
startTag.attributes);
insert(el);
return el;
}
@@ -173,8 +205,9 @@ class HtmlTreeBuilder extends TreeBuilder {
insertNode(el);
if (startTag.isSelfClosing()) {
tokeniser.acknowledgeSelfClosingFlag();
if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output
if (!tag.isKnownTag()) {
tag.setSelfClosing();
}
}
return el;
}
@@ -187,29 +220,37 @@ class HtmlTreeBuilder extends TreeBuilder {
void insert(Token.Character characterToken) {
Node node;
// characters in script and style go in as datanodes, not text nodes
if (StringUtil.in(currentElement().tagName(), "script", "style"))
if (StringUtil.in(currentElement().tagName(), "script", "style")) {
node = new DataNode(characterToken.getData(), baseUri);
else
} else {
node = new TextNode(characterToken.getData(), baseUri);
currentElement().appendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack.
}
currentElement().appendChild(node); // doesn't use insertNode, because
// we don't foster these; and will
// always have a stack.
}

private void insertNode(Node node) {
// if the stack hasn't been set up yet, elements (doctype, comments) go into the doc
if (stack.size() == 0)
// if the stack hasn't been set up yet, elements (doctype, comments) go
// into the doc
if (stack.size() == 0) {
doc.appendChild(node);
else if (isFosterInserts())
} else if (isFosterInserts()) {
insertInFosterParent(node);
else
} else {
currentElement().appendChild(node);
}
}

Element pop() {
// todo - dev, remove validation check
if (stack.peekLast().nodeName().equals("td") && !state.name().equals("InCell"))
if (stack.peekLast().nodeName().equals("td")
&& !state.name().equals("InCell")) {
Validate.isFalse(true, "pop td not in cell");
if (stack.peekLast().nodeName().equals("html"))
}
if (stack.peekLast().nodeName().equals("html")) {
Validate.isFalse(true, "popping html!");
}
return stack.pollLast();
}

@@ -225,7 +266,8 @@ class HtmlTreeBuilder extends TreeBuilder {
return isElementInQueue(stack, el);
}

private boolean isElementInQueue(DescendableLinkedList<Element> queue, Element element) {
private boolean isElementInQueue(DescendableLinkedList<Element> queue,
Element element) {
Iterator<Element> it = queue.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
@@ -313,10 +355,12 @@ class HtmlTreeBuilder extends TreeBuilder {
Iterator<Element> it = stack.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
if (StringUtil.in(next.nodeName(), nodeNames) || next.nodeName().equals("html"))
if (StringUtil.in(next.nodeName(), nodeNames)
|| next.nodeName().equals("html")) {
break;
else
} else {
it.remove();
}
}
}

@@ -335,14 +379,15 @@ class HtmlTreeBuilder extends TreeBuilder {
void insertOnStackAfter(Element after, Element in) {
int i = stack.lastIndexOf(after);
Validate.isTrue(i != -1);
stack.add(i+1, in);
stack.add(i + 1, in);
}

void replaceOnStack(Element out, Element in) {
replaceInQueue(stack, out, in);
}

private void replaceInQueue(LinkedList<Element> queue, Element out, Element in) {
private void replaceInQueue(LinkedList<Element> queue, Element out,
Element in) {
int i = queue.lastIndexOf(out);
Validate.isTrue(i != -1);
queue.remove(i);
@@ -368,7 +413,8 @@ class HtmlTreeBuilder extends TreeBuilder {
} else if ("tr".equals(name)) {
transition(HtmlTreeBuilderState.InRow);
break;
} else if ("tbody".equals(name) || "thead".equals(name) || "tfoot".equals(name)) {
} else if ("tbody".equals(name) || "thead".equals(name)
|| "tfoot".equals(name)) {
transition(HtmlTreeBuilderState.InTableBody);
break;
} else if ("caption".equals(name)) {
@@ -400,28 +446,35 @@ class HtmlTreeBuilder extends TreeBuilder {
}

// todo: tidy up in specific scope methods
private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes) {
return inSpecificScope(new String[]{targetName}, baseTypes, extraTypes);
private boolean inSpecificScope(String targetName, String[] baseTypes,
String[] extraTypes) {
return inSpecificScope(new String[] { targetName }, baseTypes,
extraTypes);
}

private boolean inSpecificScope(String[] targetNames, String[] baseTypes, String[] extraTypes) {
private boolean inSpecificScope(String[] targetNames, String[] baseTypes,
String[] extraTypes) {
Iterator<Element> it = stack.descendingIterator();
while (it.hasNext()) {
Element el = it.next();
String elName = el.nodeName();
if (StringUtil.in(elName, targetNames))
if (StringUtil.in(elName, targetNames)) {
return true;
if (StringUtil.in(elName, baseTypes))
}
if (StringUtil.in(elName, baseTypes)) {
return false;
if (extraTypes != null && StringUtil.in(elName, extraTypes))
}
if (extraTypes != null && StringUtil.in(elName, extraTypes)) {
return false;
}
}
Validate.fail("Should not be reachable");
return false;
}

boolean inScope(String[] targetNames) {
return inSpecificScope(targetNames, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, null);
return inSpecificScope(targetNames, new String[] { "applet", "caption",
"html", "table", "td", "th", "marquee", "object" }, null);
}

boolean inScope(String targetName) {
@@ -429,21 +482,23 @@ class HtmlTreeBuilder extends TreeBuilder {
}

boolean inScope(String targetName, String[] extras) {
return inSpecificScope(targetName, new String[]{"applet", "caption", "html", "table", "td", "th", "marquee", "object"}, extras);
return inSpecificScope(targetName, new String[] { "applet", "caption",
"html", "table", "td", "th", "marquee", "object" }, extras);
// todo: in mathml namespace: mi, mo, mn, ms, mtext annotation-xml
// todo: in svg namespace: forignOjbect, desc, title
}

boolean inListItemScope(String targetName) {
return inScope(targetName, new String[]{"ol", "ul"});
return inScope(targetName, new String[] { "ol", "ul" });
}

boolean inButtonScope(String targetName) {
return inScope(targetName, new String[]{"button"});
return inScope(targetName, new String[] { "button" });
}

boolean inTableScope(String targetName) {
return inSpecificScope(targetName, new String[]{"html", "table"}, null);
return inSpecificScope(targetName, new String[] { "html", "table" },
null);
}

boolean inSelectScope(String targetName) {
@@ -451,10 +506,12 @@ class HtmlTreeBuilder extends TreeBuilder {
while (it.hasNext()) {
Element el = it.next();
String elName = el.nodeName();
if (elName.equals(targetName))
if (elName.equals(targetName)) {
return true;
if (!StringUtil.in(elName, "optgroup", "option")) // all elements except
}
if (!StringUtil.in(elName, "optgroup", "option")) {
return false;
}
}
Validate.fail("Should not be reachable");
return false;
@@ -497,18 +554,26 @@ class HtmlTreeBuilder extends TreeBuilder {
}

/**
11.2.5.2 Closing elements that have implied end tags<p/>
When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a
dt element, an li element, an option element, an optgroup element, a p element, an rp element, or an rt element,
the UA must pop the current node off the stack of open elements.

@param excludeTag If a step requires the UA to generate implied end tags but lists an element to exclude from the
process, then the UA must perform the above steps as if that element was not in the above list.
* 11.2.5.2 Closing elements that have implied end tags
* <p/>
* When the steps below require the UA to generate implied end tags, then,
* while the current node is a dd element, a dt element, an li element, an
* option element, an optgroup element, a p element, an rp element, or an rt
* element, the UA must pop the current node off the stack of open elements.
*
* @param excludeTag
* If a step requires the UA to generate implied end tags but
* lists an element to exclude from the process, then the UA must
* perform the above steps as if that element was not in the
* above list.
*/
void generateImpliedEndTags(String excludeTag) {
while ((excludeTag != null && !currentElement().nodeName().equals(excludeTag)) &&
StringUtil.in(currentElement().nodeName(), "dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
while ((excludeTag != null && !currentElement().nodeName().equals(
excludeTag))
&& StringUtil.in(currentElement().nodeName(), "dd", "dt", "li",
"option", "optgroup", "p", "rp", "rt")) {
pop();
}
}

void generateImpliedEndTags() {
@@ -519,14 +584,18 @@ class HtmlTreeBuilder extends TreeBuilder {
// todo: mathml's mi, mo, mn
// todo: svg's foreigObject, desc, title
String name = el.nodeName();
return StringUtil.in(name, "address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
"blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
"details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
"iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
"title", "tr", "ul", "wbr", "xmp");
return StringUtil.in(name, "address", "applet", "area", "article",
"aside", "base", "basefont", "bgsound", "blockquote", "body",
"br", "button", "caption", "center", "col", "colgroup",
"command", "dd", "details", "dir", "div", "dl", "dt", "embed",
"fieldset", "figcaption", "figure", "footer", "form", "frame",
"frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head",
"header", "hgroup", "hr", "html", "iframe", "img", "input",
"isindex", "li", "link", "listing", "marquee", "menu", "meta",
"nav", "noembed", "noframes", "noscript", "object", "ol", "p",
"param", "plaintext", "pre", "script", "section", "select",
"style", "summary", "table", "tbody", "td", "textarea",
"tfoot", "th", "thead", "title", "tr", "ul", "wbr", "xmp");
}

// active formatting elements
@@ -534,12 +603,14 @@ class HtmlTreeBuilder extends TreeBuilder {
int numSeen = 0;
Iterator<Element> iter = formattingElements.descendingIterator();
while (iter.hasNext()) {
Element el = iter.next();
if (el == null) // marker
Element el = iter.next();
if (el == null) {
break;
}

if (isSameFormattingElement(in, el))
if (isSameFormattingElement(in, el)) {
numSeen++;
}

if (numSeen == 3) {
iter.remove();
@@ -550,17 +621,20 @@ class HtmlTreeBuilder extends TreeBuilder {
}

private boolean isSameFormattingElement(Element a, Element b) {
// same if: same namespace, tag, and attributes. Element.equals only checks tag, might in future check children
// same if: same namespace, tag, and attributes. Element.equals only
// checks tag, might in future check children
return a.nodeName().equals(b.nodeName()) &&
// a.namespace().equals(b.namespace()) &&
// a.namespace().equals(b.namespace()) &&
a.attributes().equals(b.attributes());
// todo: namespaces
}

void reconstructFormattingElements() {
int size = formattingElements.size();
if (size == 0 || formattingElements.getLast() == null || onStack(formattingElements.getLast()))
if (size == 0 || formattingElements.getLast() == null
|| onStack(formattingElements.getLast())) {
return;
}

Element entry = formattingElements.getLast();
int pos = size - 1;
@@ -570,18 +644,24 @@ class HtmlTreeBuilder extends TreeBuilder {
skip = true;
break;
}
entry = formattingElements.get(--pos); // step 5. one earlier than entry
if (entry == null || onStack(entry)) // step 6 - neither marker nor on stack
entry = formattingElements.get(--pos); // step 5. one earlier than
// entry
if (entry == null || onStack(entry)) {
break; // jump to 8, else continue back to 4
}
}
while(true) {
if (!skip) // step 7: on later than entry
while (true) {
if (!skip) {
entry = formattingElements.get(++pos);
Validate.notNull(entry); // should not occur, as we break at last element
}
Validate.notNull(entry); // should not occur, as we break at last
// element

// 8. create new element from element, 9 insert into current node, onto stack
// 8. create new element from element, 9 insert into current node,
// onto stack
skip = false; // can only skip increment from 4.
Element newEl = insert(entry.nodeName()); // todo: avoid fostering here?
Element newEl = insert(entry.nodeName()); // todo: avoid fostering
// here?
// newEl.namespace(entry.namespace()); // todo: namespaces
newEl.attributes().addAll(entry.attributes());

@@ -590,8 +670,9 @@ class HtmlTreeBuilder extends TreeBuilder {
formattingElements.remove(pos + 1);

// 11
if (pos == size-1) // if not last entry in list, jump to 7
if (pos == size - 1) {
break;
}
}
}

@@ -599,8 +680,9 @@ class HtmlTreeBuilder extends TreeBuilder {
while (!formattingElements.isEmpty()) {
Element el = formattingElements.peekLast();
formattingElements.removeLast();
if (el == null)
if (el == null) {
break;
}
}
}

@@ -623,10 +705,11 @@ class HtmlTreeBuilder extends TreeBuilder {
Iterator<Element> it = formattingElements.descendingIterator();
while (it.hasNext()) {
Element next = it.next();
if (next == null) // scope marker
if (next == null) {
break;
else if (next.nodeName().equals(nodeName))
} else if (next.nodeName().equals(nodeName)) {
return next;
}
}
return null;
}
@@ -647,26 +730,25 @@ class HtmlTreeBuilder extends TreeBuilder {
if (lastTable.parent() != null) {
fosterParent = lastTable.parent();
isLastTableParent = true;
} else
} else {
fosterParent = aboveOnStack(lastTable);
}
} else { // no table == frag
fosterParent = stack.get(0);
}

if (isLastTableParent) {
Validate.notNull(lastTable); // last table cannot be null by this point.
Validate.notNull(lastTable); // last table cannot be null by this
// point.
lastTable.before(in);
}
else
} else {
fosterParent.appendChild(in);
}
}

@Override
public String toString() {
return "TreeBuilder{" +
"currentToken=" + currentToken +
", state=" + state +
", currentElement=" + currentElement() +
'}';
return "TreeBuilder{" + "currentToken=" + currentToken + ", state="
+ state + ", currentElement=" + currentElement() + '}';
}
}

+ 917
- 728
server/src/org/jsoup/parser/HtmlTreeBuilderState.java
File diff suppressed because it is too large
View File


+ 5
- 2
server/src/org/jsoup/parser/ParseError.java View File

@@ -1,7 +1,8 @@
package org.jsoup.parser;

/**
* A Parse Error records an error in the input HTML that occurs in either the tokenisation or the tree building phase.
* A Parse Error records an error in the input HTML that occurs in either the
* tokenisation or the tree building phase.
*/
public class ParseError {
private int pos;
@@ -13,12 +14,13 @@ public class ParseError {
}

ParseError(int pos, String errorFormat, Object... args) {
this.errorMsg = String.format(errorFormat, args);
errorMsg = String.format(errorFormat, args);
this.pos = pos;
}

/**
* Retrieve the error message.
*
* @return the error message.
*/
public String getErrorMessage() {
@@ -27,6 +29,7 @@ public class ParseError {

/**
* Retrieves the offset of the error.
*
* @return error offset within input
*/
public int getPosition() {

+ 4
- 4
server/src/org/jsoup/parser/ParseErrorList.java View File

@@ -7,15 +7,15 @@ import java.util.ArrayList;
*
* @author Jonathan Hedley
*/
class ParseErrorList extends ArrayList<ParseError>{
class ParseErrorList extends ArrayList<ParseError> {
private static final int INITIAL_CAPACITY = 16;
private final int maxSize;
ParseErrorList(int initialCapacity, int maxSize) {
super(initialCapacity);
this.maxSize = maxSize;
}
boolean canAddError() {
return size() < maxSize;
}
@@ -27,7 +27,7 @@ class ParseErrorList extends ArrayList<ParseError>{
static ParseErrorList noTracking() {
return new ParseErrorList(0, 0);
}
static ParseErrorList tracking(int maxSize) {
return new ParseErrorList(INITIAL_CAPACITY, maxSize);
}

+ 82
- 41
server/src/org/jsoup/parser/Parser.java View File

@@ -1,32 +1,36 @@
package org.jsoup.parser;

import java.util.List;

import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;

import java.util.List;

/**
* Parses HTML into a {@link org.jsoup.nodes.Document}. Generally best to use one of the more convenient parse methods
* in {@link org.jsoup.Jsoup}.
* Parses HTML into a {@link org.jsoup.nodes.Document}. Generally best to use
* one of the more convenient parse methods in {@link org.jsoup.Jsoup}.
*/
public class Parser {
private static final int DEFAULT_MAX_ERRORS = 0; // by default, error tracking is disabled.
private static final int DEFAULT_MAX_ERRORS = 0; // by default, error
// tracking is disabled.

private TreeBuilder treeBuilder;
private int maxErrors = DEFAULT_MAX_ERRORS;
private ParseErrorList errors;

/**
* Create a new Parser, using the specified TreeBuilder
* @param treeBuilder TreeBuilder to use to parse input into Documents.
*
* @param treeBuilder
* TreeBuilder to use to parse input into Documents.
*/
public Parser(TreeBuilder treeBuilder) {
this.treeBuilder = treeBuilder;
}
public Document parseInput(String html, String baseUri) {
errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking();
errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors)
: ParseErrorList.noTracking();
Document doc = treeBuilder.parse(html, baseUri, errors);
return doc;
}
@@ -34,6 +38,7 @@ public class Parser {
// gets & sets
/**
* Get the TreeBuilder currently in use.
*
* @return current TreeBuilder.
*/
public TreeBuilder getTreeBuilder() {
@@ -42,7 +47,9 @@ public class Parser {

/**
* Update the TreeBuilder used when parsing content.
* @param treeBuilder current TreeBuilder
*
* @param treeBuilder
* current TreeBuilder
* @return this, for chaining
*/
public Parser setTreeBuilder(TreeBuilder treeBuilder) {
@@ -52,6 +59,7 @@ public class Parser {

/**
* Check if parse error tracking is enabled.
*
* @return current track error state.
*/
public boolean isTrackErrors() {
@@ -60,7 +68,9 @@ public class Parser {

/**
* Enable or disable parse error tracking for the next parse.
* @param maxErrors the maximum number of errors to track. Set to 0 to disable.
*
* @param maxErrors
* the maximum number of errors to track. Set to 0 to disable.
* @return this, for chaining
*/
public Parser setTrackErrors(int maxErrors) {
@@ -70,7 +80,9 @@ public class Parser {

/**
* Retrieve the parse errors, if any, from the last parse.
* @return list of parse errors, up to the size of the maximum errors tracked.
*
* @return list of parse errors, up to the size of the maximum errors
* tracked.
*/
public List<ParseError> getErrors() {
return errors;
@@ -79,10 +91,13 @@ public class Parser {
// static parse functions below
/**
* Parse HTML into a Document.
*
* @param html HTML to parse
* @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
*
*
* @param html
* HTML to parse
* @param baseUri
* base URI of document (i.e. original fetch location), for
* resolving relative URLs.
*
* @return parsed Document
*/
public static Document parse(String html, String baseUri) {
@@ -91,33 +106,49 @@ public class Parser {
}

/**
* Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context.
*
* @param fragmentHtml the fragment of HTML to parse
* @param context (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This
* provides stack context (for implicit element creation).
* @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
*
* @return list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified.
* Parse a fragment of HTML into a list of nodes. The context element, if
* supplied, supplies parsing context.
*
* @param fragmentHtml
* the fragment of HTML to parse
* @param context
* (optional) the element that this HTML fragment is being parsed
* for (i.e. for inner HTML). This provides stack context (for
* implicit element creation).
* @param baseUri
* base URI of document (i.e. original fetch location), for
* resolving relative URLs.
*
* @return list of nodes parsed from the input HTML. Note that the context
* element, if supplied, is not modified.
*/
public static List<Node> parseFragment(String fragmentHtml, Element context, String baseUri) {
public static List<Node> parseFragment(String fragmentHtml,
Element context, String baseUri) {
HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder();
return treeBuilder.parseFragment(fragmentHtml, context, baseUri, ParseErrorList.noTracking());
return treeBuilder.parseFragment(fragmentHtml, context, baseUri,
ParseErrorList.noTracking());
}

/**
* Parse a fragment of HTML into the {@code body} of a Document.
*
* @param bodyHtml fragment of HTML
* @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
*
*
* @param bodyHtml
* fragment of HTML
* @param baseUri
* base URI of document (i.e. original fetch location), for
* resolving relative URLs.
*
* @return Document, with empty head, and HTML parsed into body
*/
public static Document parseBodyFragment(String bodyHtml, String baseUri) {
Document doc = Document.createShell(baseUri);
Element body = doc.body();
List<Node> nodeList = parseFragment(bodyHtml, body, baseUri);
Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented
Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node
// list gets
// modified
// when
// re-parented
for (Node node : nodes) {
body.appendChild(node);
}
@@ -125,21 +156,29 @@ public class Parser {
}

/**
* @param bodyHtml HTML to parse
* @param baseUri baseUri base URI of document (i.e. original fetch location), for resolving relative URLs.
*
* @param bodyHtml
* HTML to parse
* @param baseUri
* baseUri base URI of document (i.e. original fetch location),
* for resolving relative URLs.
*
* @return parsed Document
* @deprecated Use {@link #parseBodyFragment} or {@link #parseFragment} instead.
* @deprecated Use {@link #parseBodyFragment} or {@link #parseFragment}
* instead.
*/
public static Document parseBodyFragmentRelaxed(String bodyHtml, String baseUri) {
@Deprecated
public static Document parseBodyFragmentRelaxed(String bodyHtml,
String baseUri) {
return parse(bodyHtml, baseUri);
}
// builders

/**
* Create a new HTML parser. This parser treats input as HTML5, and enforces the creation of a normalised document,
* based on a knowledge of the semantics of the incoming tags.
* Create a new HTML parser. This parser treats input as HTML5, and enforces
* the creation of a normalised document, based on a knowledge of the
* semantics of the incoming tags.
*
* @return a new HTML parser.
*/
public static Parser htmlParser() {
@@ -147,8 +186,10 @@ public class Parser {
}

/**
* Create a new XML parser. This parser assumes no knowledge of the incoming tags and does not treat it as HTML,
* rather creates a simple tree directly from the input.
* Create a new XML parser. This parser assumes no knowledge of the incoming
* tags and does not treat it as HTML, rather creates a simple tree directly
* from the input.
*
* @return a new simple XML parser.
*/
public static Parser xmlParser() {

+ 92
- 56
server/src/org/jsoup/parser/Tag.java View File

@@ -1,25 +1,31 @@
package org.jsoup.parser;

import org.jsoup.helper.Validate;

import java.util.HashMap;
import java.util.Map;

import org.jsoup.helper.Validate;

/**
* HTML Tag capabilities.
*
*
* @author Jonathan Hedley, jonathan@hedley.net
*/
public class Tag {
private static final Map<String, Tag> tags = new HashMap<String, Tag>(); // map of known tags
private static final Map<String, Tag> tags = new HashMap<String, Tag>(); // map
// of
// known
// tags

private String tagName;
private boolean isBlock = true; // block or inline
private boolean formatAsBlock = true; // should be formatted as a block
private boolean canContainBlock = true; // Can this tag hold block level tags?
private boolean canContainBlock = true; // Can this tag hold block level
// tags?
private boolean canContainInline = true; // only pcdata if not
private boolean empty = false; // can hold nothing; e.g. img
private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
private boolean selfClosing = false; // can self close (<foo />). used for
// unknown tags that self close,
// without forcing them as empty.
private boolean preserveWhitespace = false; // for pre, textarea, script etc

private Tag(String tagName) {
@@ -28,7 +34,7 @@ public class Tag {

/**
* Get this tag's name.
*
*
* @return the tag's name
*/
public String getName() {
@@ -36,11 +42,14 @@ public class Tag {
}

/**
* Get a Tag by name. If not previously defined (unknown), returns a new generic tag, that can do anything.
* Get a Tag by name. If not previously defined (unknown), returns a new
* generic tag, that can do anything.
* <p/>
* Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not registered and will only .equals().
*
* @param tagName Name of tag, e.g. "p". Case insensitive.
* Pre-defined tags (P, DIV etc) will be ==, but unknown tags are not
* registered and will only .equals().
*
* @param tagName
* Name of tag, e.g. "p". Case insensitive.
* @return The tag, either defined or new generic.
*/
public static Tag valueOf(String tagName) {
@@ -51,7 +60,8 @@ public class Tag {
synchronized (tags) {
Tag tag = tags.get(tagName);
if (tag == null) {
// not defined: create default; go anywhere, do anything! (incl be inside a <p>)
// not defined: create default; go anywhere, do anything! (incl
// be inside a <p>)
tag = new Tag(tagName);
tag.isBlock = false;
tag.canContainBlock = true;
@@ -62,7 +72,7 @@ public class Tag {

/**
* Gets if this is a block tag.
*
*
* @return if block tag
*/
public boolean isBlock() {
@@ -71,7 +81,7 @@ public class Tag {

/**
* Gets if this tag should be formatted as a block (or as inline)
*
*
* @return if should be formatted as block or inline
*/
public boolean formatAsBlock() {
@@ -80,7 +90,7 @@ public class Tag {

/**
* Gets if this tag can contain block tags.
*
*
* @return if tag can contain block tags
*/
public boolean canContainBlock() {
@@ -89,7 +99,7 @@ public class Tag {

/**
* Gets if this tag is an inline tag.
*
*
* @return if this tag is an inline tag.
*/
public boolean isInline() {
@@ -98,7 +108,7 @@ public class Tag {

/**
* Gets if this tag is a data only tag.
*
*
* @return if this tag is a data only tag
*/
public boolean isData() {
@@ -107,7 +117,7 @@ public class Tag {

/**
* Get if this is an empty tag
*
*
* @return if this is an empty tag
*/
public boolean isEmpty() {
@@ -116,7 +126,7 @@ public class Tag {

/**
* Get if this tag is self closing.
*
*
* @return if this tag should be output as self closing.
*/
public boolean isSelfClosing() {
@@ -125,7 +135,7 @@ public class Tag {

/**
* Get if this is a pre-defined tag, or was auto created on parsing.
*
*
* @return if a known tag
*/
public boolean isKnownTag() {
@@ -134,8 +144,9 @@ public class Tag {

/**
* Check if this tagname is a known tag.
*
* @param tagName name of tag
*
* @param tagName
* name of tag
* @return if known HTML tag
*/
public static boolean isKnownTag(String tagName) {
@@ -144,7 +155,7 @@ public class Tag {

/**
* Get if this tag should preserve whitespace within child text nodes.
*
*
* @return if preserve whitepace
*/
public boolean preserveWhitespace() {
@@ -158,19 +169,39 @@ public class Tag {

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Tag)) return false;
if (this == o) {
return true;
}
if (!(o instanceof Tag)) {
return false;
}

Tag tag = (Tag) o;

if (canContainBlock != tag.canContainBlock) return false;
if (canContainInline != tag.canContainInline) return false;
if (empty != tag.empty) return false;
if (formatAsBlock != tag.formatAsBlock) return false;
if (isBlock != tag.isBlock) return false;
if (preserveWhitespace != tag.preserveWhitespace) return false;
if (selfClosing != tag.selfClosing) return false;
if (!tagName.equals(tag.tagName)) return false;
if (canContainBlock != tag.canContainBlock) {
return false;
}
if (canContainInline != tag.canContainInline) {
return false;
}
if (empty != tag.empty) {
return false;
}
if (formatAsBlock != tag.formatAsBlock) {
return false;
}
if (isBlock != tag.isBlock) {
return false;
}
if (preserveWhitespace != tag.preserveWhitespace) {
return false;
}
if (selfClosing != tag.selfClosing) {
return false;
}
if (!tagName.equals(tag.tagName)) {
return false;
}

return true;
}
@@ -188,34 +219,39 @@ public class Tag {
return result;
}

@Override
public String toString() {
return tagName;
}

// internal static initialisers:
// prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other sources
private static final String[] blockTags = {
"html", "head", "body", "frameset", "script", "noscript", "style", "meta", "link", "title", "frame",
"noframes", "section", "nav", "aside", "hgroup", "header", "footer", "p", "h1", "h2", "h3", "h4", "h5", "h6",
"ul", "ol", "pre", "div", "blockquote", "hr", "address", "figure", "figcaption", "form", "fieldset", "ins",
"del", "dl", "dt", "dd", "li", "table", "caption", "thead", "tfoot", "tbody", "colgroup", "col", "tr", "th",
"td", "video", "audio", "canvas", "details", "menu", "plaintext"
};
private static final String[] inlineTags = {
"object", "base", "font", "tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code", "samp", "kbd",
"var", "cite", "abbr", "time", "acronym", "mark", "ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q",
"sub", "sup", "bdo", "iframe", "embed", "span", "input", "select", "textarea", "label", "button", "optgroup",
"option", "legend", "datalist", "keygen", "output", "progress", "meter", "area", "param", "source", "track",
"summary", "command", "device"
};
private static final String[] emptyTags = {
"meta", "link", "base", "frame", "img", "br", "wbr", "embed", "hr", "input", "keygen", "col", "command",
"device"
};
private static final String[] formatAsInlineTags = {
"title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style"
};
private static final String[] preserveWhitespaceTags = {"pre", "plaintext", "title"};
// prepped from http://www.w3.org/TR/REC-html40/sgml/dtd.html and other
// sources
private static final String[] blockTags = { "html", "head", "body",
"frameset", "script", "noscript", "style", "meta", "link", "title",
"frame", "noframes", "section", "nav", "aside", "hgroup", "header",
"footer", "p", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol",
"pre", "div", "blockquote", "hr", "address", "figure",
"figcaption", "form", "fieldset", "ins", "del", "dl", "dt", "dd",
"li", "table", "caption", "thead", "tfoot", "tbody", "colgroup",
"col", "tr", "th", "td", "video", "audio", "canvas", "details",
"menu", "plaintext" };
private static final String[] inlineTags = { "object", "base", "font",
"tt", "i", "b", "u", "big", "small", "em", "strong", "dfn", "code",
"samp", "kbd", "var", "cite", "abbr", "time", "acronym", "mark",
"ruby", "rt", "rp", "a", "img", "br", "wbr", "map", "q", "sub",
"sup", "bdo", "iframe", "embed", "span", "input", "select",
"textarea", "label", "button", "optgroup", "option", "legend",
"datalist", "keygen", "output", "progress", "meter", "area",
"param", "source", "track", "summary", "command", "device" };
private static final String[] emptyTags = { "meta", "link", "base",
"frame", "img", "br", "wbr", "embed", "hr", "input", "keygen",
"col", "command", "device" };
private static final String[] formatAsInlineTags = { "title", "a", "p",
"h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th",
"td", "script", "style" };
private static final String[] preserveWhitespaceTags = { "pre",
"plaintext", "title" };

static {
// creates

+ 19
- 18
server/src/org/jsoup/parser/Token.java View File

@@ -12,7 +12,7 @@ abstract class Token {

private Token() {
}
String tokenType() {
return this.getClass().getSimpleName();
}
@@ -50,13 +50,16 @@ abstract class Token {
private String pendingAttributeValue;

boolean selfClosing = false;
Attributes attributes = new Attributes(); // todo: allow nodes to not have attributes
Attributes attributes = new Attributes(); // todo: allow nodes to not
// have attributes

void newAttribute() {
if (pendingAttributeName != null) {
if (pendingAttributeValue == null)
if (pendingAttributeValue == null) {
pendingAttributeValue = "";
Attribute attribute = new Attribute(pendingAttributeName, pendingAttributeValue);
}
Attribute attribute = new Attribute(pendingAttributeName,
pendingAttributeValue);
attributes.put(attribute);
}
pendingAttributeName = null;
@@ -85,12 +88,13 @@ abstract class Token {
return selfClosing;
}

@SuppressWarnings({"TypeMayBeWeakened"})
@SuppressWarnings({ "TypeMayBeWeakened" })
Attributes getAttributes() {
return attributes;
}

// these appenders are rarely hit in not null state-- caused by null chars.
// these appenders are rarely hit in not null state-- caused by null
// chars.
void appendTagName(String append) {
tagName = tagName == null ? append : tagName.concat(append);
}
@@ -100,7 +104,8 @@ abstract class Token {
}

void appendAttributeName(String append) {
pendingAttributeName = pendingAttributeName == null ? append : pendingAttributeName.concat(append);
pendingAttributeName = pendingAttributeName == null ? append
: pendingAttributeName.concat(append);
}

void appendAttributeName(char append) {
@@ -108,7 +113,8 @@ abstract class Token {
}

void appendAttributeValue(String append) {
pendingAttributeValue = pendingAttributeValue == null ? append : pendingAttributeValue.concat(append);
pendingAttributeValue = pendingAttributeValue == null ? append
: pendingAttributeValue.concat(append);
}

void appendAttributeValue(char append) {
@@ -124,12 +130,12 @@ abstract class Token {

StartTag(String name) {
this();
this.tagName = name;
tagName = name;
}

StartTag(String name, Attributes attributes) {
this();
this.tagName = name;
tagName = name;
this.attributes = attributes;
}

@@ -139,7 +145,7 @@ abstract class Token {
}
}

static class EndTag extends Tag{
static class EndTag extends Tag {
EndTag() {
super();
type = TokenType.EndTag;
@@ -147,7 +153,7 @@ abstract class Token {

EndTag(String name) {
this();
this.tagName = name;
tagName = name;
}

@Override
@@ -242,11 +248,6 @@ abstract class Token {
}

enum TokenType {
Doctype,
StartTag,
EndTag,
Comment,
Character,
EOF
Doctype, StartTag, EndTag, Comment, Character, EOF
}
}

+ 177
- 97
server/src/org/jsoup/parser/TokenQueue.java View File

@@ -5,18 +5,20 @@ import org.jsoup.helper.Validate;

/**
* A character queue with parsing helpers.
*
*
* @author Jonathan Hedley
*/
public class TokenQueue {
private String queue;
private int pos = 0;
private static final char ESC = '\\'; // escape char for chomp balanced.

/**
Create a new TokenQueue.
@param data string of data to back queue.
* Create a new TokenQueue.
*
* @param data
* string of data to back queue.
*/
public TokenQueue(String data) {
Validate.notNull(data);
@@ -25,18 +27,20 @@ public class TokenQueue {

/**
* Is the queue empty?
*
* @return true if no data left in queue.
*/
public boolean isEmpty() {
return remainingLength() == 0;
}
private int remainingLength() {
return queue.length() - pos;
}

/**
* Retrieves but does not remove the first character from the queue.
*
* @return First character, or 0 if empty.
*/
public char peek() {
@@ -44,16 +48,21 @@ public class TokenQueue {
}

/**
Add a character to the start of the queue (will be the next character retrieved).
@param c character to add
* Add a character to the start of the queue (will be the next character
* retrieved).
*
* @param c
* character to add
*/
public void addFirst(Character c) {
addFirst(c.toString());
}

/**
Add a string to the start of the queue.
@param seq string to add.
* Add a string to the start of the queue.
*
* @param seq
* string to add.
*/
public void addFirst(String seq) {
// not very performant, but an edge case
@@ -62,8 +71,11 @@ public class TokenQueue {
}

/**
* Tests if the next characters on the queue match the sequence. Case insensitive.
* @param seq String to check queue for.
* Tests if the next characters on the queue match the sequence. Case
* insensitive.
*
* @param seq
* String to check queue for.
* @return true if the next characters match.
*/
public boolean matches(String seq) {
@@ -72,47 +84,57 @@ public class TokenQueue {

/**
* Case sensitive match test.
* @param seq string to case sensitively check for
*
* @param seq
* string to case sensitively check for
* @return true if matched, false if not
*/
public boolean matchesCS(String seq) {
return queue.startsWith(seq, pos);
}

/**
Tests if the next characters match any of the sequences. Case insensitive.
@param seq list of strings to case insensitively check for
@return true of any matched, false if none did
* Tests if the next characters match any of the sequences. Case
* insensitive.
*
* @param seq
* list of strings to case insensitively check for
* @return true of any matched, false if none did
*/
public boolean matchesAny(String... seq) {
for (String s : seq) {
if (matches(s))
if (matches(s)) {
return true;
}
}
return false;
}

public boolean matchesAny(char... seq) {
if (isEmpty())
if (isEmpty()) {
return false;
}

for (char c: seq) {
if (queue.charAt(pos) == c)
for (char c : seq) {
if (queue.charAt(pos) == c) {
return true;
}
}
return false;
}

public boolean matchesStartTag() {
// micro opt for matching "<x"
return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character.isLetter(queue.charAt(pos+1)));
return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character
.isLetter(queue.charAt(pos + 1)));
}

/**
* Tests if the queue matches the sequence (as with match), and if they do, removes the matched string from the
* queue.
* @param seq String to search for, and if found, remove from queue.
* Tests if the queue matches the sequence (as with match), and if they do,
* removes the matched string from the queue.
*
* @param seq
* String to search for, and if found, remove from queue.
* @return true if found and removed, false if not found.
*/
public boolean matchChomp(String seq) {
@@ -125,16 +147,18 @@ public class TokenQueue {
}

/**
Tests if queue starts with a whitespace character.
@return if starts with whitespace
* Tests if queue starts with a whitespace character.
*
* @return if starts with whitespace
*/
public boolean matchesWhitespace() {
return !isEmpty() && StringUtil.isWhitespace(queue.charAt(pos));
}

/**
Test if the queue matches a word character (letter or digit).
@return if matches a word character
* Test if the queue matches a word character (letter or digit).
*
* @return if matches a word character
*/
public boolean matchesWord() {
return !isEmpty() && Character.isLetterOrDigit(queue.charAt(pos));
@@ -144,11 +168,14 @@ public class TokenQueue {
* Drops the next character off the queue.
*/
public void advance() {
if (!isEmpty()) pos++;
if (!isEmpty()) {
pos++;
}
}

/**
* Consume one character off queue.
*
* @return first character on queue.
*/
public char consume() {
@@ -156,25 +183,36 @@ public class TokenQueue {
}

/**
* Consumes the supplied sequence of the queue. If the queue does not start with the supplied sequence, will
* throw an illegal state exception -- but you should be running match() against that condition.
<p>
Case insensitive.
* @param seq sequence to remove from head of queue.
* Consumes the supplied sequence of the queue. If the queue does not start
* with the supplied sequence, will throw an illegal state exception -- but
* you should be running match() against that condition.
* <p>
* Case insensitive.
*
* @param seq
* sequence to remove from head of queue.
*/
public void consume(String seq) {
if (!matches(seq))
throw new IllegalStateException("Queue did not match expected sequence");
if (!matches(seq)) {
throw new IllegalStateException(
"Queue did not match expected sequence");
}
int len = seq.length();
if (len > remainingLength())
throw new IllegalStateException("Queue not long enough to consume sequence");
if (len > remainingLength()) {
throw new IllegalStateException(
"Queue not long enough to consume sequence");
}

pos += len;
}

/**
* Pulls a string off the queue, up to but exclusive of the match sequence, or to the queue running out.
* @param seq String to end on (and not include in return, but leave on queue). <b>Case sensitive.</b>
* Pulls a string off the queue, up to but exclusive of the match sequence,
* or to the queue running out.
*
* @param seq
* String to end on (and not include in return, but leave on
* queue). <b>Case sensitive.</b>
* @return The matched data consumed from queue.
*/
public String consumeTo(String seq) {
@@ -187,38 +225,52 @@ public class TokenQueue {
return remainder();
}
}
public String consumeToIgnoreCase(String seq) {
int start = pos;
String first = seq.substring(0, 1);
boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if first is not cased, use index of
boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if
// first
// is
// not
// cased,
// use
// index
// of
while (!isEmpty()) {
if (matches(seq))
if (matches(seq)) {
break;
}

if (canScan) {
int skip = queue.indexOf(first, pos) - pos;
if (skip == 0) // this char is the skip char, but not match, so force advance of pos
if (skip == 0) {
pos++;
else if (skip < 0) // no chance of finding, grab to end
} else if (skip < 0) {
pos = queue.length();
else
} else {
pos += skip;
}
else
}
} else {
pos++;
}
}

String data = queue.substring(start, pos);
return data;
String data = queue.substring(start, pos);
return data;
}

/**
Consumes to the first sequence provided, or to the end of the queue. Leaves the terminator on the queue.
@param seq any number of terminators to consume to. <b>Case insensitive.</b>
@return consumed string
* Consumes to the first sequence provided, or to the end of the queue.
* Leaves the terminator on the queue.
*
* @param seq
* any number of terminators to consume to. <b>Case
* insensitive.</b>
* @return consumed string
*/
// todo: method name. not good that consumeTo cares for case, and consume to any doesn't. And the only use for this
// todo: method name. not good that consumeTo cares for case, and consume to
// any doesn't. And the only use for this
// is is a case sensitive time...
public String consumeToAny(String... seq) {
int start = pos;
@@ -226,16 +278,20 @@ public class TokenQueue {
pos++;
}

String data = queue.substring(start, pos);
return data;
String data = queue.substring(start, pos);
return data;
}

/**
* Pulls a string off the queue (like consumeTo), and then pulls off the matched string (but does not return it).
* Pulls a string off the queue (like consumeTo), and then pulls off the
* matched string (but does not return it).
* <p>
* If the queue runs out of characters before finding the seq, will return as much as it can (and queue will go
* isEmpty() == true).
* @param seq String to match up to, and not include in return, and to pull off queue. <b>Case sensitive.</b>
* If the queue runs out of characters before finding the seq, will return
* as much as it can (and queue will go isEmpty() == true).
*
* @param seq
* String to match up to, and not include in return, and to pull
* off queue. <b>Case sensitive.</b>
* @return Data matched from queue.
*/
public String chompTo(String seq) {
@@ -243,7 +299,7 @@ public class TokenQueue {
matchChomp(seq);
return data;
}
public String chompToIgnoreCase(String seq) {
String data = consumeToIgnoreCase(seq); // case insensitive scan
matchChomp(seq);
@@ -251,12 +307,17 @@ public class TokenQueue {
}

/**
* Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
* and leave " four" on the queue. Unbalanced openers and closers can be escaped (with \). Those escapes will be left
* in the returned string, which is suitable for regexes (where we need to preserve the escape), but unsuitable for
* Pulls a balanced string off the queue. E.g. if queue is
* "(one (two) three) four", (,) will return "one (two) three", and leave
* " four" on the queue. Unbalanced openers and closers can be escaped (with
* \). Those escapes will be left in the returned string, which is suitable
* for regexes (where we need to preserve the escape), but unsuitable for
* contains text strings; use unescape for that.
* @param open opener
* @param close closer
*
* @param open
* opener
* @param close
* closer
* @return data matched from the queue
*/
public String chompBalanced(char open, char close) {
@@ -265,25 +326,32 @@ public class TokenQueue {
char last = 0;

do {
if (isEmpty()) break;
if (isEmpty()) {
break;
}
Character c = consume();
if (last == 0 || last != ESC) {
if (c.equals(open))
if (c.equals(open)) {
depth++;
else if (c.equals(close))
} else if (c.equals(close)) {
depth--;
}
}

if (depth > 0 && last != 0)
accum.append(c); // don't include the outer match pair in the return
if (depth > 0 && last != 0) {
accum.append(c); // don't include the outer match pair in the
// return
}
last = c;
} while (depth > 0);
return accum.toString();
}
/**
* Unescaped a \ escaped string.
* @param in backslash escaped string
*
* @param in
* backslash escaped string
* @return unescaped string
*/
public static String unescape(String in) {
@@ -291,11 +359,12 @@ public class TokenQueue {
char last = 0;
for (char c : in.toCharArray()) {
if (c == ESC) {
if (last != 0 && last == ESC)
if (last != 0 && last == ESC) {
out.append(c);
}
else
}
} else {
out.append(c);
}
last = c;
}
return out.toString();
@@ -315,15 +384,17 @@ public class TokenQueue {

/**
* Retrieves the next run of word type (letter or digit) off the queue.
*
* @return String of word characters from queue, or empty string if none.
*/
public String consumeWord() {
int start = pos;
while (matchesWord())
while (matchesWord()) {
pos++;
}
return queue.substring(start, pos);
}
/**
* Consume an tag name off the queue (word or :, _, -)
*
@@ -331,53 +402,61 @@ public class TokenQueue {
*/
public String consumeTagName() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-')))
while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-'))) {
pos++;
}

return queue.substring(start, pos);
}
/**
* Consume a CSS element selector (tag name, but | instead of : for namespaces, to not conflict with :pseudo selects).
* Consume a CSS element selector (tag name, but | instead of : for
* namespaces, to not conflict with :pseudo selects).
*
* @return tag name
*/
public String consumeElementSelector() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-')))
while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-'))) {
pos++;
}

return queue.substring(start, pos);
}

/**
Consume a CSS identifier (ID or class) off the queue (letter, digit, -, _)
http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
@return identifier
* Consume a CSS identifier (ID or class) off the queue (letter, digit, -,
* _) http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
*
* @return identifier
*/
public String consumeCssIdentifier() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny('-', '_')))
while (!isEmpty() && (matchesWord() || matchesAny('-', '_'))) {
pos++;
}

return queue.substring(start, pos);
}

/**
Consume an attribute key off the queue (letter, digit, -, _, :")
@return attribute key
* Consume an attribute key off the queue (letter, digit, -, _, :")
*
* @return attribute key
*/
public String consumeAttributeKey() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':')))
while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':'))) {
pos++;
}

return queue.substring(start, pos);
}

/**
Consume and return whatever is left on the queue.
@return remained of queue.
* Consume and return whatever is left on the queue.
*
* @return remained of queue.
*/
public String remainder() {
StringBuilder accum = new StringBuilder();
@@ -386,7 +465,8 @@ public class TokenQueue {
}
return accum.toString();
}

@Override
public String toString() {
return queue.substring(pos);
}

+ 69
- 35
server/src/org/jsoup/parser/Tokeniser.java View File

@@ -3,9 +3,6 @@ package org.jsoup.parser;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.Entities;

import java.util.ArrayList;
import java.util.List;

/**
* Readers the input stream into tokens.
*/
@@ -15,16 +12,21 @@ class Tokeniser {
private CharacterReader reader; // html input
private ParseErrorList errors; // errors found while tokenising

private TokeniserState state = TokeniserState.Data; // current tokenisation state
private TokeniserState state = TokeniserState.Data; // current tokenisation
// state
private Token emitPending; // the token we are about to emit on next read
private boolean isEmitPending = false;
private StringBuilder charBuffer = new StringBuilder(); // buffers characters to output as one token
private StringBuilder charBuffer = new StringBuilder(); // buffers
// characters to
// output as one
// token
StringBuilder dataBuffer; // buffers data looking for </script>

Token.Tag tagPending; // tag we are building up
Token.Doctype doctypePending; // doctype building up
Token.Comment commentPending; // comment building up
private Token.StartTag lastStartTag; // the last start tag emitted, to test appropriate end tag
private Token.StartTag lastStartTag; // the last start tag emitted, to test
// appropriate end tag
private boolean selfClosingFlagAcknowledged = true;

Tokeniser(CharacterReader reader, ParseErrorList errors) {
@@ -38,10 +40,12 @@ class Tokeniser {
selfClosingFlagAcknowledged = true;
}

while (!isEmitPending)
while (!isEmitPending) {
state.read(this, reader);
}

// if emit is pending, a non-character token was found: return any chars in buffer, and leave token for next read:
// if emit is pending, a non-character token was found: return any chars
// in buffer, and leave token for next read:
if (charBuffer.length() > 0) {
String str = charBuffer.toString();
charBuffer.delete(0, charBuffer.length());
@@ -61,17 +65,20 @@ class Tokeniser {
if (token.type == Token.TokenType.StartTag) {
Token.StartTag startTag = (Token.StartTag) token;
lastStartTag = startTag;
if (startTag.selfClosing)
if (startTag.selfClosing) {
selfClosingFlagAcknowledged = false;
}
} else if (token.type == Token.TokenType.EndTag) {
Token.EndTag endTag = (Token.EndTag) token;
if (endTag.attributes.size() > 0)
if (endTag.attributes.size() > 0) {
error("Attributes incorrectly present on end tag");
}
}
}

void emit(String str) {
// buffer strings up until last string token found, to emit only one token for a run of character refs etc.
// buffer strings up until last string token found, to emit only one
// token for a run of character refs etc.
// does not set isEmitPending; read checks that
charBuffer.append(str);
}
@@ -97,32 +104,40 @@ class Tokeniser {
selfClosingFlagAcknowledged = true;
}

Character consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) {
if (reader.isEmpty())
Character consumeCharacterReference(Character additionalAllowedCharacter,
boolean inAttribute) {
if (reader.isEmpty()) {
return null;
if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.current())
}
if (additionalAllowedCharacter != null
&& additionalAllowedCharacter == reader.current()) {
return null;
if (reader.matchesAny('\t', '\n', '\f', ' ', '<', '&'))
}
if (reader.matchesAny('\t', '\n', '\f', ' ', '<', '&')) {
return null;
}

reader.mark();
if (reader.matchConsume("#")) { // numbered
boolean isHexMode = reader.matchConsumeIgnoreCase("X");
String numRef = isHexMode ? reader.consumeHexSequence() : reader.consumeDigitSequence();
String numRef = isHexMode ? reader.consumeHexSequence() : reader
.consumeDigitSequence();
if (numRef.length() == 0) { // didn't match anything
characterReferenceError("numeric reference with no numerals");
reader.rewindToMark();
return null;
}
if (!reader.matchConsume(";"))
if (!reader.matchConsume(";")) {
characterReferenceError("missing semicolon"); // missing semi
}
int charval = -1;
try {
int base = isHexMode ? 16 : 10;
charval = Integer.valueOf(numRef, base);
} catch (NumberFormatException e) {
} // skip
if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF) || charval > 0x10FFFF) {
if (charval == -1 || (charval >= 0xD800 && charval <= 0xDFFF)
|| charval > 0x10FFFF) {
characterReferenceError("character outside of valid range");
return replacementChar;
} else {
@@ -131,32 +146,40 @@ class Tokeniser {
return (char) charval;
}
} else { // named
// get as many letters as possible, and look for matching entities. unconsume backwards till a match is found
// get as many letters as possible, and look for matching entities.
// unconsume backwards till a match is found
String nameRef = reader.consumeLetterThenDigitSequence();
String origNameRef = new String(nameRef); // for error reporting. nameRef gets chomped looking for matches
String origNameRef = new String(nameRef); // for error reporting.
// nameRef gets chomped
// looking for matches
boolean looksLegit = reader.matches(';');
boolean found = false;
while (nameRef.length() > 0 && !found) {
if (Entities.isNamedEntity(nameRef))
if (Entities.isNamedEntity(nameRef)) {
found = true;
else {
nameRef = nameRef.substring(0, nameRef.length()-1);
} else {
nameRef = nameRef.substring(0, nameRef.length() - 1);
reader.unconsume();
}
}
if (!found) {
if (looksLegit) // named with semicolon
characterReferenceError(String.format("invalid named referenece '%s'", origNameRef));
if (looksLegit) {
characterReferenceError(String.format(
"invalid named referenece '%s'", origNameRef));
}
reader.rewindToMark();
return null;
}
if (inAttribute && (reader.matchesLetter() || reader.matchesDigit() || reader.matchesAny('=', '-', '_'))) {
if (inAttribute
&& (reader.matchesLetter() || reader.matchesDigit() || reader
.matchesAny('=', '-', '_'))) {
// don't want that to match
reader.rewindToMark();
return null;
}
if (!reader.matchConsume(";"))
if (!reader.matchConsume(";")) {
characterReferenceError("missing semicolon"); // missing semi
}
return Entities.getCharacterByName(nameRef);
}
}
@@ -192,8 +215,9 @@ class Tokeniser {
}

boolean isAppropriateEndTagToken() {
if (lastStartTag == null)
if (lastStartTag == null) {
return false;
}
return tagPending.tagName.equals(lastStartTag.tagName);
}

@@ -202,23 +226,33 @@ class Tokeniser {
}

void error(TokeniserState state) {
if (errors.canAddError())
errors.add(new ParseError(reader.pos(), "Unexpected character '%s' in input state [%s]", reader.current(), state));
if (errors.canAddError()) {
errors.add(new ParseError(reader.pos(),
"Unexpected character '%s' in input state [%s]", reader
.current(), state));
}
}

void eofError(TokeniserState state) {
if (errors.canAddError())
errors.add(new ParseError(reader.pos(), "Unexpectedly reached end of file (EOF) in input state [%s]", state));
if (errors.canAddError()) {
errors.add(new ParseError(
reader.pos(),
"Unexpectedly reached end of file (EOF) in input state [%s]",
state));
}
}

private void characterReferenceError(String message) {
if (errors.canAddError())
errors.add(new ParseError(reader.pos(), "Invalid character reference: %s", message));
if (errors.canAddError()) {
errors.add(new ParseError(reader.pos(),
"Invalid character reference: %s", message));
}
}

private void error(String errorMsg) {
if (errors.canAddError())
if (errors.canAddError()) {
errors.add(new ParseError(reader.pos(), errorMsg));
}
}

boolean currentNodeInHtmlNS() {

+ 1177
- 1085
server/src/org/jsoup/parser/TokeniserState.java
File diff suppressed because it is too large
View File


+ 0
- 0
server/src/org/jsoup/parser/TreeBuilder.java View File


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save