1 files changed, 239 insertions, 206 deletions
diff --git a/src/java/org/apache/fop/fo/FOText.java b/src/java/org/apache/fop/fo/FOText.java
index f21386075..217997ace 100644
--- a/src/java/org/apache/fop/fo/FOText.java
+++ b/src/java/org/apache/fop/fo/FOText.java
@@ -20,6 +20,7 @@
 package org.apache.fop.fo;
 
 import java.awt.Color;
+import java.nio.CharBuffer;
 import java.util.NoSuchElementException;
 
 import org.xml.sax.Locator;
@@ -33,48 +34,17 @@ import org.apache.fop.fo.properties.CommonTextDecoration;
 import org.apache.fop.fo.properties.KeepProperty;
 import org.apache.fop.fo.properties.Property;
 import org.apache.fop.fo.properties.SpaceProperty;
+import org.apache.fop.util.CharUtilities;
 
 /**
  * A text node (PCDATA) in the formatting object tree.
- *
- * Unfortunately the BufferManager implementation holds
- * onto references to the character data in this object
- * longer than the lifetime of the object itself, causing
- * excessive memory consumption and OOM errors.
  */
-public class FOText extends FONode {
+public class FOText extends FONode implements CharSequence {
 
-    /**
-     * the character array containing the text
-     */
-    public char[] ca;
-
-    /**
-     * The starting valid index of the ca array 
-     * to be processed.
-     *
-     * This value is originally equal to 0, but becomes 
-     * incremented during leading whitespace removal by the flow.Block class,  
-     * via the TextCharIterator.remove() method below.
-     */
-    public int startIndex = 0;
+    /** the <code>CharBuffer</code> containing the text */
+    private CharBuffer charBuffer;
 
-    /**
-     * The ending valid index of the ca array 
-     * to be processed.
-     *
-     * This value is originally equal to ca.length, but becomes 
-     * decremented during between-word whitespace removal by the 
-     * XMLWhiteSpaceHandler via the TextCharIterator.remove() 
-     * method below.
-     */
-    public int endIndex = 0;
-
-    /** properties relevant for PCDATA */
-    /* TODO: these are basically always the same as the parent FObj or FObjMixed
-     *      so maybe those can be removed, and the accessors could  
-     *      dispatch the call to the parent?
-     */
+    /** properties relevant for #PCDATA */
     private CommonFont commonFont;
     private CommonHyphenation commonHyphenation;
     private Color color;
@@ -105,10 +75,10 @@ public class FOText extends FONode {
      * which FOText nodes are descendants of the same block.
      */
     private Block ancestorBlock = null;
-    
+
     /** Holds the text decoration values. May be null */
     private CommonTextDecoration textDecoration;
-    
+
     private static final int IS_WORD_CHAR_FALSE = 0;
     private static final int IS_WORD_CHAR_TRUE = 1;
     private static final int IS_WORD_CHAR_MAYBE = 2;
@@ -123,37 +93,61 @@ public class FOText extends FONode {
     }
 
     /** {@inheritDoc} */
-    protected void addCharacters(char[] data, int start, int end,
+    protected void addCharacters(char[] data, int start, int length,
             PropertyList list, Locator locator) throws FOPException {
 
-        int length = end - start;
-        int calength = 0;
-        char[] nca;
-        if (ca != null) {
-            calength = ca.length;
-            nca = new char[calength + length];
-            System.arraycopy(ca, 0, nca, 0, calength);
+        if (this.charBuffer == null) {
+            // buffer not yet initialized, do so now
+            this.charBuffer = CharBuffer.allocate(length);
         } else {
-            nca = new char[length];
+            // allocate a larger buffer, and transfer contents
+            int newLength = this.charBuffer.limit() + length;
+            CharBuffer newBuffer = CharBuffer.allocate(newLength);
+            this.charBuffer.rewind();
+            newBuffer.put(this.charBuffer);
+            this.charBuffer = newBuffer;
         }
-        System.arraycopy(data, start, nca, calength, length);
-        endIndex = nca.length;
-        this.ca = nca;
-     }
+        // append characters
+        this.charBuffer.put(data, start, length);
+
+    }
 
     /**
-     * {@inheritDoc} 
+     * Return the array of characters for this instance.
+     *
+     * @return  a char array containing the text
      */
+    public char[] getCharArray() {
+
+        if (this.charBuffer == null) {
+            return null;
+        }
+
+        if (this.charBuffer.hasArray()) {
+            return this.charBuffer.array();
+        }
+
+        // only if the buffer implementation has
+        // no accessible backing array, return a new one
+        char[] ca = new char[this.charBuffer.limit()];
+        this.charBuffer.rewind();
+        this.charBuffer.get(ca);
+        return ca;
+
+    }
+
+    /** {@inheritDoc} */
     public FONode clone(FONode parent, boolean removeChildren)
             throws FOPException {
         FOText ft = (FOText) super.clone(parent, removeChildren);
         if (removeChildren) {
-            //not really removing, but just make sure the char array 
-            //pointed to is really a different one, and reset any
-            //possible whitespace-handling effects
-            if (ca != null) {
-                ft.ca = new char[ca.length];
-                System.arraycopy(ca, 0, ft.ca, 0, ca.length);
+            // not really removing, just make sure the char buffer
+            // pointed to is really a different one
+            if (this.charBuffer != null) {
+                ft.charBuffer = CharBuffer.allocate(this.charBuffer.limit());
+                this.charBuffer.rewind();
+                ft.charBuffer.put(this.charBuffer);
+                ft.charBuffer.rewind();
             }
         }
         ft.prevFOTextThisBlock = null;
@@ -162,29 +156,33 @@ public class FOText extends FONode {
         return ft;
     }
 
-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
     public void bind(PropertyList pList) throws FOPException {
-        commonFont = pList.getFontProps();
-        commonHyphenation = pList.getHyphenationProps();
-        color = pList.get(Constants.PR_COLOR).getColor(getUserAgent());
-        keepTogether = pList.get(Constants.PR_KEEP_TOGETHER).getKeep();
-        lineHeight = pList.get(Constants.PR_LINE_HEIGHT).getSpace();
-        letterSpacing = pList.get(Constants.PR_LETTER_SPACING);
-        whiteSpaceCollapse = pList.get(Constants.PR_WHITE_SPACE_COLLAPSE).getEnum();
-        whiteSpaceTreatment = pList.get(Constants.PR_WHITE_SPACE_TREATMENT).getEnum();
-        textTransform = pList.get(Constants.PR_TEXT_TRANSFORM).getEnum();
-        wordSpacing = pList.get(Constants.PR_WORD_SPACING);
-        wrapOption = pList.get(Constants.PR_WRAP_OPTION).getEnum();
-        textDecoration = pList.getTextDecorationProps();
-        baselineShift = pList.get(Constants.PR_BASELINE_SHIFT).getLength();
+        this.commonFont = pList.getFontProps();
+        this.commonHyphenation = pList.getHyphenationProps();
+        this.color = pList.get(Constants.PR_COLOR).getColor(getUserAgent());
+        this.keepTogether = pList.get(Constants.PR_KEEP_TOGETHER).getKeep();
+        this.lineHeight = pList.get(Constants.PR_LINE_HEIGHT).getSpace();
+        this.letterSpacing = pList.get(Constants.PR_LETTER_SPACING);
+        this.whiteSpaceCollapse = pList.get(Constants.PR_WHITE_SPACE_COLLAPSE).getEnum();
+        this.whiteSpaceTreatment = pList.get(Constants.PR_WHITE_SPACE_TREATMENT).getEnum();
+        this.textTransform = pList.get(Constants.PR_TEXT_TRANSFORM).getEnum();
+        this.wordSpacing = pList.get(Constants.PR_WORD_SPACING);
+        this.wrapOption = pList.get(Constants.PR_WRAP_OPTION).getEnum();
+        this.textDecoration = pList.getTextDecorationProps();
+        this.baselineShift = pList.get(Constants.PR_BASELINE_SHIFT).getLength();
     }
 
     /** {@inheritDoc} */
     protected void endOfNode() throws FOPException {
+        super.endOfNode();
+        getFOEventHandler().characters(
+                this.getCharArray(), 0, this.charBuffer.limit());
+    }
+
+    /** {@inheritDoc} */
+    public void finalizeNode() {
         textTransform();
-        getFOEventHandler().characters(ca, startIndex, endIndex);
     }
 
     /**
@@ -198,16 +196,20 @@ public class FOText extends FONode {
      */
     public boolean willCreateArea() {
         if (whiteSpaceCollapse == Constants.EN_FALSE
-                && endIndex - startIndex > 0) {
+                && this.charBuffer.limit() > 0) {
             return true;
         }
 
-        for (int i = startIndex; i < endIndex; i++) {
-            char ch = ca[i];
-            if (!((ch == ' ')
-                    || (ch == '\n')
-                    || (ch == '\r')
-                    || (ch == '\t'))) { // whitespace
+        char ch;
+        this.charBuffer.rewind();
+        while (this.charBuffer.hasRemaining()) {
+            ch = this.charBuffer.get();
+            if (!((ch == CharUtilities.SPACE)
+                    || (ch == CharUtilities.LINEFEED_CHAR)
+                    || (ch == CharUtilities.CARRIAGE_RETURN)
+                    || (ch == CharUtilities.TAB))) {
+                // not whitespace
+                this.charBuffer.rewind();
                 return true;
             }
         }
@@ -222,7 +224,7 @@ public class FOText extends FONode {
     }
 
      /**
-     * This method is run as part of the ancestor Block's flushText(), to 
+     * This method is run as part of the ancestor Block's flushText(), to
      * create xref pointers to the previous FOText objects within the same Block
      * @param  ancestorBlock the ancestor fo:block
      */
@@ -230,7 +232,7 @@ public class FOText extends FONode {
         this.ancestorBlock = ancestorBlock;
         // if the last FOText is a sibling, point to it, and have it point here
         if (ancestorBlock.lastFOTextProcessed != null) {
-            if (ancestorBlock.lastFOTextProcessed.ancestorBlock 
+            if (ancestorBlock.lastFOTextProcessed.ancestorBlock
                     == this.ancestorBlock) {
                 prevFOTextThisBlock = ancestorBlock.lastFOTextProcessed;
                 prevFOTextThisBlock.nextFOTextThisBlock = this;
@@ -241,16 +243,47 @@ public class FOText extends FONode {
     }
 
     /**
-     * This method is run as part of the Constructor, to handle the
-     * text-transform property.
+     * This method is run as part of endOfNode(), to handle the
+     * text-transform property for accumulated FOText
      */
     private void textTransform() {
-        if (getFOEventHandler().inMarker() 
+        if (getBuilderContext().inMarker()
                 || textTransform == Constants.EN_NONE) {
             return;
         }
-        for (int i = 0; i < endIndex; i++) {
-            ca[i] = charTransform(i);
+
+        this.charBuffer.rewind();
+        CharBuffer tmp = this.charBuffer.slice();
+        char c;
+        int lim = this.charBuffer.limit();
+        int pos = -1;
+        while (++pos < lim) {
+            c = this.charBuffer.get();
+            switch (textTransform) {
+                case Constants.EN_UPPERCASE:
+                    tmp.put(Character.toUpperCase(c));
+                    break;
+                case Constants.EN_LOWERCASE:
+                    tmp.put(Character.toLowerCase(c));
+                    break;
+                case Constants.EN_CAPITALIZE:
+                    if (isStartOfWord(pos)) {
+                        /*
+                         Use toTitleCase here. Apparently, some languages use
+                         a different character to represent a letter when using
+                         initial caps than when all of the letters in the word
+                         are capitalized. We will try to let Java handle this.
+                        */
+                        tmp.put(Character.toTitleCase(c));
+                    } else {
+                        tmp.put(c);
+                    }
+                    break;
+                default:
+                     //should never happen as the property subsystem catches that case
+                    assert false;
+                    //nop
+            }
         }
     }
 
@@ -261,7 +294,7 @@ public class FOText extends FONode {
      * well, such as word-spacing. The definition of "word" is somewhat ambiguous
      * and appears to be definable by the user agent.
      *
-     * @param i index into ca[]
+     * @param i index into charBuffer
      *
      * @return True if the character at this location is the start of a new
      * word.
@@ -269,33 +302,33 @@ public class FOText extends FONode {
     private boolean isStartOfWord(int i) {
         char prevChar = getRelativeCharInBlock(i, -1);
         /* All we are really concerned about here is of what type prevChar
-           is. If inputChar is not part of a word, then the Java
-           conversions will (we hope) simply return inputChar.
-        */
-        switch (isWordChar(prevChar)) {
-        case IS_WORD_CHAR_TRUE:
-            return false;
-        case IS_WORD_CHAR_FALSE:
-            return true;
-        /* "MAYBE" implies that additional context is needed. An example is a
-         * single-quote, either straight or closing, which might be interpreted
-         * as a possessive or a contraction, or might be a closing quote.
+         * is. If inputChar is not part of a word, then the Java
+         * conversions will (we hope) simply return inputChar.
          */
-        case IS_WORD_CHAR_MAYBE:
-            char prevPrevChar = getRelativeCharInBlock(i, -2);
-            switch (isWordChar(prevPrevChar)) {
+        switch (isWordChar(prevChar)) {
             case IS_WORD_CHAR_TRUE:
                 return false;
             case IS_WORD_CHAR_FALSE:
                 return true;
+            /* "MAYBE" implies that additional context is needed. An example is a
+             * single-quote, either straight or closing, which might be interpreted
+             * as a possessive or a contraction, or might be a closing quote.
+             */
             case IS_WORD_CHAR_MAYBE:
-                return true;
+                char prevPrevChar = getRelativeCharInBlock(i, -2);
+                switch (isWordChar(prevPrevChar)) {
+                case IS_WORD_CHAR_TRUE:
+                    return false;
+                case IS_WORD_CHAR_FALSE:
+                    return true;
+                case IS_WORD_CHAR_MAYBE:
+                    return true;
+                default:
+                    return false;
+            }
             default:
                 return false;
         }
-        default:
-            return false;
-        }
     }
 
     /**
@@ -304,7 +337,7 @@ public class FOText extends FONode {
      * block as one unit, allowing text in adjoining FOText objects to be
      * returned if the parameters are outside of the current object.
      *
-     * @param i index into ca[]
+     * @param i index into the CharBuffer
      * @param offset signed integer with relative position within the
      *   block of the character to return. To return the character immediately
      *   preceding i, pass -1. To return the character immediately after i,
@@ -313,30 +346,34 @@ public class FOText extends FONode {
      * the offset points to an area outside of the block.
      */
     private char getRelativeCharInBlock(int i, int offset) {
+
+        int charIndex = i + offset;
         // The easy case is where the desired character is in the same FOText
-        if (((i + offset) >= 0) && ((i + offset) <= this.endIndex)) {
-            return ca[i + offset];
+        if (charIndex >= 0 && charIndex < this.length()) {
+            return this.charAt(i + offset);
         }
+
         // For now, we can't look at following FOText nodes
         if (offset > 0) {
-             return '\u0000';
-         }
+             return CharUtilities.NULL_CHAR;
+        }
+
         // Remaining case has the text in some previous FOText node
         boolean foundChar = false;
-        char charToReturn = '\u0000';
+        char charToReturn = CharUtilities.NULL_CHAR;
         FOText nodeToTest = this;
         int remainingOffset = offset + i;
         while (!foundChar) {
             if (nodeToTest.prevFOTextThisBlock == null) {
-                foundChar = true;
                 break;
             }
             nodeToTest = nodeToTest.prevFOTextThisBlock;
-            if ((nodeToTest.endIndex + remainingOffset) >= 0) {
-                charToReturn = nodeToTest.ca[nodeToTest.endIndex + remainingOffset];
+            int diff = nodeToTest.length() + remainingOffset - 1;
+            if (diff >= 0) {
+                charToReturn = nodeToTest.charAt(diff);
                 foundChar = true;
             } else {
-                remainingOffset = remainingOffset + nodeToTest.endIndex;
+                remainingOffset += diff;
             }
         }
         return charToReturn;
@@ -367,39 +404,6 @@ public class FOText extends FONode {
     }
 
     /**
-     * Transforms one character in ca[] using the text-transform property.
-     *
-     * @param i the index into ca[]
-     * @return char with transformed value
-     */
-    private char charTransform(int i) {
-        switch (textTransform) {
-        /* put NONE first, as this is probably the common case */
-        case Constants.EN_NONE:
-            return ca[i];
-        case Constants.EN_UPPERCASE:
-            return Character.toUpperCase(ca[i]);
-        case Constants.EN_LOWERCASE:
-            return Character.toLowerCase(ca[i]);
-        case Constants.EN_CAPITALIZE:
-            if (isStartOfWord(i)) {
-                /*
-                 Use toTitleCase here. Apparently, some languages use
-                 a different character to represent a letter when using
-                 initial caps than when all of the letters in the word
-                 are capitalized. We will try to let Java handle this.
-                */
-                return Character.toTitleCase(ca[i]);
-            } else {
-                return ca[i];
-            }
-        default:
-            assert false; //should never happen as the property subsystem catches that case
-            return ca[i];
-        }
-    }
-
-    /**
      * Determines whether the input char should be considered part of a
      * "word". This is used primarily to determine whether the character
      * immediately following starts a new word, but may have other uses.
@@ -484,57 +488,64 @@ public class FOText extends FONode {
     }
 
     private class TextCharIterator extends CharIterator {
-        private int curIndex = 0;
-
-        /* Current space removal process:  just increment the startIndex
-           to "remove" leading spaces from ca, until an unremoved character
-           is found.  Then perform arraycopy's to remove extra spaces
-           between words.  nextCharCalled is used to determine if an 
-           unremoved character has already been found--if its value > 2
-           than it means that has occurred (it is reset to zero each time we 
-           remove a space via incrementing the startIndex.)  */
-        private int nextCharCalled = 0;
-        
+
+        int currentPosition = 0;
+
+        boolean canRemove = false;
+        boolean canReplace = false;
+
+        /** {@inheritDoc} */
         public boolean hasNext() {
-           if (curIndex == 0) {
-//             log.debug("->" + new String(ca) + "<-");
-          }
-           return (curIndex < endIndex);
+           return (this.currentPosition < charBuffer.limit());
         }
 
+        /** {@inheritDoc} */
         public char nextChar() {
-            if (curIndex < endIndex) {
-                nextCharCalled++;
-                // Just a char class? Don't actually care about the value!
-                return ca[curIndex++];
+
+            if (this.currentPosition < charBuffer.limit()) {
+                this.canRemove = true;
+                this.canReplace = true;
+                return charBuffer.get(currentPosition++);
             } else {
                 throw new NoSuchElementException();
             }
+
         }
 
+        /** {@inheritDoc} */
         public void remove() {
-            if (curIndex < endIndex && nextCharCalled < 2) {
-                startIndex++;
-                nextCharCalled = 0;
-//              log.debug("removeA: " + new String(ca, startIndex, endIndex - startIndex));
-            } else if (curIndex < endIndex) {
-                // copy from curIndex to end to curIndex-1
-                System.arraycopy(ca, curIndex, ca, curIndex - 1, 
-                    endIndex - curIndex);
-                endIndex--;
-                curIndex--;
-//              log.debug("removeB: " + new String(ca, startIndex, endIndex - startIndex));
-            } else if (curIndex == endIndex) {
-//              log.debug("removeC: " + new String(ca, startIndex, endIndex - startIndex));
-                endIndex--;
-                curIndex--;
+
+            if (this.canRemove) {
+                charBuffer.position(currentPosition);
+                // Slice the buffer at the current position
+                CharBuffer tmp = charBuffer.slice();
+                // Reset position to before current character
+                charBuffer.position(--currentPosition);
+                if (tmp.hasRemaining()) {
+                    // Transfer any remaining characters
+                    charBuffer.mark();
+                    charBuffer.put(tmp);
+                    charBuffer.reset();
+                }
+                // Decrease limit
+                charBuffer.limit(charBuffer.limit() - 1);
+                // Make sure following calls fail, unless nextChar() was called
+                this.canRemove = false;
+            } else {
+                throw new IllegalStateException();
             }
+
         }
 
+        /** {@inheritDoc} */
         public void replaceChar(char c) {
-            if (curIndex > 0 && curIndex <= endIndex) {
-                ca[curIndex - 1] = c;
+
+            if (this.canReplace) {
+                charBuffer.put(currentPosition - 1, c);
+            } else {
+                throw new IllegalStateException();
             }
+
         }
 
     }
@@ -560,7 +571,7 @@ public class FOText extends FONode {
         return color;
     }
 
-    /** 
+    /**
      * @return the "keep-together" property.
      */
     public KeepProperty getKeepTogether() {
@@ -571,40 +582,40 @@ public class FOText extends FONode {
      * @return the "letter-spacing" property.
      */
     public Property getLetterSpacing() {
-        return letterSpacing; 
+        return letterSpacing;
     }
-    
+
     /**
      * @return the "line-height" property.
      */
     public SpaceProperty getLineHeight() {
         return lineHeight;
     }
-    
+
     /**
      * @return the "white-space-treatment" property
      */
     public int getWhitespaceTreatment() {
         return whiteSpaceTreatment;
     }
-    
+
     /**
      * @return the "word-spacing" property.
      */
     public Property getWordSpacing() {
-        return wordSpacing; 
+        return wordSpacing;
     }
-    
+
     /**
      * @return the "wrap-option" property.
      */
     public int getWrapOption() {
-        return wrapOption; 
+        return wrapOption;
     }
-    
+
     /** @return the "text-decoration" property. */
     public CommonTextDecoration getTextDecoration() {
-        return textDecoration; 
+        return textDecoration;
     }
 
     /** @return the baseline-shift property */
@@ -614,14 +625,12 @@ public class FOText extends FONode {
 
     /** {@inheritDoc} */
     public String toString() {
-        StringBuffer sb = new StringBuffer(super.toString());
-        sb.append(" (").append(ca).append(")");
-        return sb.toString();
+        return (this.charBuffer == null) ? "" : this.charBuffer.toString();
     }
- 
+
     /** {@inheritDoc} */
     public String getLocalName() {
-        return null;
+        return "#PCDATA";
     }
 
     /** {@inheritDoc} */
@@ -631,10 +640,34 @@ public class FOText extends FONode {
 
     /** {@inheritDoc} */
     protected String gatherContextInfo() {
-        if (getLocator() != null) {
+        if (this.locator != null) {
             return super.gatherContextInfo();
         } else {
-            return new String(ca).trim();
+            return this.toString();
+        }
+    }
+
+    /** {@inheritDoc} */
+    public char charAt(int position) {
+        return this.charBuffer.get(position);
+    }
+
+    /** {@inheritDoc} */
+    public CharSequence subSequence(int start, int end) {
+        return this.charBuffer.subSequence(start, end);
+    }
+
+    /** {@inheritDoc} */
+    public int length() {
+        return this.charBuffer.limit();
+    }
+
+    /**
+     * Resets the backing <code>java.nio.CharBuffer</code>
+     */
+    public void resetBuffer() {
+        if (this.charBuffer != null) {
+            this.charBuffer.rewind();
         }
-    }    
-}
-\ No newline at end of file
+    }
+}