diff options
Diffstat (limited to 'src/java/org/apache/fop/fo/FOText.java')
-rw-r--r-- | src/java/org/apache/fop/fo/FOText.java | 445 |
1 files changed, 239 insertions, 206 deletions
diff --git a/src/java/org/apache/fop/fo/FOText.java b/src/java/org/apache/fop/fo/FOText.java index 8c85bb039..217997ace 100644 --- a/src/java/org/apache/fop/fo/FOText.java +++ b/src/java/org/apache/fop/fo/FOText.java @@ -20,6 +20,7 @@ package org.apache.fop.fo; import java.awt.Color; +import java.nio.CharBuffer; import java.util.NoSuchElementException; import org.xml.sax.Locator; @@ -33,48 +34,17 @@ import org.apache.fop.fo.properties.CommonTextDecoration; import org.apache.fop.fo.properties.KeepProperty; import org.apache.fop.fo.properties.Property; import org.apache.fop.fo.properties.SpaceProperty; +import org.apache.fop.util.CharUtilities; /** * A text node (PCDATA) in the formatting object tree. - * - * Unfortunately the BufferManager implementatation holds - * onto references to the character data in this object - * longer than the lifetime of the object itself, causing - * excessive memory consumption and OOM errors. */ -public class FOText extends FONode { +public class FOText extends FONode implements CharSequence { - /** - * the character array containing the text - */ - public char[] ca; - - /** - * The starting valid index of the ca array - * to be processed. - * - * This value is originally equal to 0, but becomes - * incremented during leading whitespace removal by the flow.Block class, - * via the TextCharIterator.remove() method below. - */ - public int startIndex = 0; + /** the <code>CharBuffer</code> containing the text */ + private CharBuffer charBuffer; - /** - * The ending valid index of the ca array - * to be processed. - * - * This value is originally equal to ca.length, but becomes - * decremented during between-word whitespace removal by the - * XMLWhiteSpaceHandler via the TextCharIterator.remove() - * method below. - */ - public int endIndex = 0; - - /** properties relevant for PCDATA */ - /* TODO: these are basically always the same as the parent FObj or FObjMixed - * so maybe those can be removed, and the accessors could - * dispatch the call to the parent? - */ + /** properties relevant for #PCDATA */ private CommonFont commonFont; private CommonHyphenation commonHyphenation; private Color color; @@ -105,10 +75,10 @@ public class FOText extends FONode { * which FOText nodes are descendants of the same block. */ private Block ancestorBlock = null; - + /** Holds the text decoration values. May be null */ private CommonTextDecoration textDecoration; - + private static final int IS_WORD_CHAR_FALSE = 0; private static final int IS_WORD_CHAR_TRUE = 1; private static final int IS_WORD_CHAR_MAYBE = 2; @@ -123,37 +93,61 @@ public class FOText extends FONode { } /** {@inheritDoc} */ - protected void addCharacters(char[] data, int start, int end, + protected void addCharacters(char[] data, int start, int length, PropertyList list, Locator locator) throws FOPException { - int length = end - start; - int calength = 0; - char[] nca; - if (ca != null) { - calength = ca.length; - nca = new char[calength + length]; - System.arraycopy(ca, 0, nca, 0, calength); + if (this.charBuffer == null) { + // buffer not yet initialized, do so now + this.charBuffer = CharBuffer.allocate(length); } else { - nca = new char[length]; + // allocate a larger buffer, and transfer contents + int newLength = this.charBuffer.limit() + length; + CharBuffer newBuffer = CharBuffer.allocate(newLength); + this.charBuffer.rewind(); + newBuffer.put(this.charBuffer); + this.charBuffer = newBuffer; } - System.arraycopy(data, start, nca, calength, length); - endIndex = nca.length; - this.ca = nca; - } + // append characters + this.charBuffer.put(data, start, length); + + } /** - * {@inheritDoc} + * Return the array of characters for this instance. + * + * @return a char array containing the text */ + public char[] getCharArray() { + + if (this.charBuffer == null) { + return null; + } + + if (this.charBuffer.hasArray()) { + return this.charBuffer.array(); + } + + // only if the buffer implementation has + // no accessible backing array, return a new one + char[] ca = new char[this.charBuffer.limit()]; + this.charBuffer.rewind(); + this.charBuffer.get(ca); + return ca; + + } + + /** {@inheritDoc} */ public FONode clone(FONode parent, boolean removeChildren) throws FOPException { FOText ft = (FOText) super.clone(parent, removeChildren); if (removeChildren) { - //not really removing, but just make sure the char array - //pointed to is really a different one, and reset any - //possible whitespace-handling effects - if (ca != null) { - ft.ca = new char[ca.length]; - System.arraycopy(ca, 0, ft.ca, 0, ca.length); + // not really removing, just make sure the char buffer + // pointed to is really a different one + if (this.charBuffer != null) { + ft.charBuffer = CharBuffer.allocate(this.charBuffer.limit()); + this.charBuffer.rewind(); + ft.charBuffer.put(this.charBuffer); + ft.charBuffer.rewind(); } } ft.prevFOTextThisBlock = null; @@ -162,29 +156,33 @@ public class FOText extends FONode { return ft; } - /** - * {@inheritDoc} - */ + /** {@inheritDoc} */ public void bind(PropertyList pList) throws FOPException { - commonFont = pList.getFontProps(); - commonHyphenation = pList.getHyphenationProps(); - color = pList.get(Constants.PR_COLOR).getColor(getUserAgent()); - keepTogether = pList.get(Constants.PR_KEEP_TOGETHER).getKeep(); - lineHeight = pList.get(Constants.PR_LINE_HEIGHT).getSpace(); - letterSpacing = pList.get(Constants.PR_LETTER_SPACING); - whiteSpaceCollapse = pList.get(Constants.PR_WHITE_SPACE_COLLAPSE).getEnum(); - whiteSpaceTreatment = pList.get(Constants.PR_WHITE_SPACE_TREATMENT).getEnum(); - textTransform = pList.get(Constants.PR_TEXT_TRANSFORM).getEnum(); - wordSpacing = pList.get(Constants.PR_WORD_SPACING); - wrapOption = pList.get(Constants.PR_WRAP_OPTION).getEnum(); - textDecoration = pList.getTextDecorationProps(); - baselineShift = pList.get(Constants.PR_BASELINE_SHIFT).getLength(); + this.commonFont = pList.getFontProps(); + this.commonHyphenation = pList.getHyphenationProps(); + this.color = pList.get(Constants.PR_COLOR).getColor(getUserAgent()); + this.keepTogether = pList.get(Constants.PR_KEEP_TOGETHER).getKeep(); + this.lineHeight = pList.get(Constants.PR_LINE_HEIGHT).getSpace(); + this.letterSpacing = pList.get(Constants.PR_LETTER_SPACING); + this.whiteSpaceCollapse = pList.get(Constants.PR_WHITE_SPACE_COLLAPSE).getEnum(); + this.whiteSpaceTreatment = pList.get(Constants.PR_WHITE_SPACE_TREATMENT).getEnum(); + this.textTransform = pList.get(Constants.PR_TEXT_TRANSFORM).getEnum(); + this.wordSpacing = pList.get(Constants.PR_WORD_SPACING); + this.wrapOption = pList.get(Constants.PR_WRAP_OPTION).getEnum(); + this.textDecoration = pList.getTextDecorationProps(); + this.baselineShift = pList.get(Constants.PR_BASELINE_SHIFT).getLength(); } /** {@inheritDoc} */ protected void endOfNode() throws FOPException { + super.endOfNode(); + getFOEventHandler().characters( + this.getCharArray(), 0, this.charBuffer.limit()); + } + + /** {@inheritDoc} */ + public void finalizeNode() { textTransform(); - getFOEventHandler().characters(ca, startIndex, endIndex); } /** @@ -198,16 +196,20 @@ public class FOText extends FONode { */ public boolean willCreateArea() { if (whiteSpaceCollapse == Constants.EN_FALSE - && endIndex - startIndex > 0) { + && this.charBuffer.limit() > 0) { return true; } - for (int i = startIndex; i < endIndex; i++) { - char ch = ca[i]; - if (!((ch == ' ') - || (ch == '\n') - || (ch == '\r') - || (ch == '\t'))) { // whitespace + char ch; + this.charBuffer.rewind(); + while (this.charBuffer.hasRemaining()) { + ch = this.charBuffer.get(); + if (!((ch == CharUtilities.SPACE) + || (ch == CharUtilities.LINEFEED_CHAR) + || (ch == CharUtilities.CARRIAGE_RETURN) + || (ch == CharUtilities.TAB))) { + // not whitespace + this.charBuffer.rewind(); return true; } } @@ -222,7 +224,7 @@ public class FOText extends FONode { } /** - * This method is run as part of the ancestor Block's flushText(), to + * This method is run as part of the ancestor Block's flushText(), to * create xref pointers to the previous FOText objects within the same Block * @param ancestorBlock the ancestor fo:block */ @@ -230,7 +232,7 @@ public class FOText extends FONode { this.ancestorBlock = ancestorBlock; // if the last FOText is a sibling, point to it, and have it point here if (ancestorBlock.lastFOTextProcessed != null) { - if (ancestorBlock.lastFOTextProcessed.ancestorBlock + if (ancestorBlock.lastFOTextProcessed.ancestorBlock == this.ancestorBlock) { prevFOTextThisBlock = ancestorBlock.lastFOTextProcessed; prevFOTextThisBlock.nextFOTextThisBlock = this; @@ -241,16 +243,47 @@ public class FOText extends FONode { } /** - * This method is run as part of the Constructor, to handle the - * text-transform property. + * This method is run as part of endOfNode(), to handle the + * text-transform property for accumulated FOText */ private void textTransform() { - if (getFOEventHandler().inMarker() + if (getBuilderContext().inMarker() || textTransform == Constants.EN_NONE) { return; } - for (int i = 0; i < endIndex; i++) { - ca[i] = charTransform(i); + + this.charBuffer.rewind(); + CharBuffer tmp = this.charBuffer.slice(); + char c; + int lim = this.charBuffer.limit(); + int pos = -1; + while (++pos < lim) { + c = this.charBuffer.get(); + switch (textTransform) { + case Constants.EN_UPPERCASE: + tmp.put(Character.toUpperCase(c)); + break; + case Constants.EN_LOWERCASE: + tmp.put(Character.toLowerCase(c)); + break; + case Constants.EN_CAPITALIZE: + if (isStartOfWord(pos)) { + /* + Use toTitleCase here. Apparently, some languages use + a different character to represent a letter when using + initial caps than when all of the letters in the word + are capitalized. We will try to let Java handle this. + */ + tmp.put(Character.toTitleCase(c)); + } else { + tmp.put(c); + } + break; + default: + //should never happen as the property subsystem catches that case + assert false; + //nop + } } } @@ -261,7 +294,7 @@ public class FOText extends FONode { * well, such as word-spacing. The definition of "word" is somewhat ambiguous * and appears to be definable by the user agent. * - * @param i index into ca[] + * @param i index into charBuffer * * @return True if the character at this location is the start of a new * word. @@ -269,33 +302,33 @@ public class FOText extends FONode { private boolean isStartOfWord(int i) { char prevChar = getRelativeCharInBlock(i, -1); /* All we are really concerned about here is of what type prevChar - is. If inputChar is not part of a word, then the Java - conversions will (we hope) simply return inputChar. - */ - switch (isWordChar(prevChar)) { - case IS_WORD_CHAR_TRUE: - return false; - case IS_WORD_CHAR_FALSE: - return true; - /* "MAYBE" implies that additional context is needed. An example is a - * single-quote, either straight or closing, which might be interpreted - * as a possessive or a contraction, or might be a closing quote. + * is. If inputChar is not part of a word, then the Java + * conversions will (we hope) simply return inputChar. */ - case IS_WORD_CHAR_MAYBE: - char prevPrevChar = getRelativeCharInBlock(i, -2); - switch (isWordChar(prevPrevChar)) { + switch (isWordChar(prevChar)) { case IS_WORD_CHAR_TRUE: return false; case IS_WORD_CHAR_FALSE: return true; + /* "MAYBE" implies that additional context is needed. An example is a + * single-quote, either straight or closing, which might be interpreted + * as a possessive or a contraction, or might be a closing quote. + */ case IS_WORD_CHAR_MAYBE: - return true; + char prevPrevChar = getRelativeCharInBlock(i, -2); + switch (isWordChar(prevPrevChar)) { + case IS_WORD_CHAR_TRUE: + return false; + case IS_WORD_CHAR_FALSE: + return true; + case IS_WORD_CHAR_MAYBE: + return true; + default: + return false; + } default: return false; } - default: - return false; - } } /** @@ -304,7 +337,7 @@ public class FOText extends FONode { * block as one unit, allowing text in adjoining FOText objects to be * returned if the parameters are outside of the current object. * - * @param i index into ca[] + * @param i index into the CharBuffer * @param offset signed integer with relative position within the * block of the character to return. To return the character immediately * preceding i, pass -1. To return the character immediately after i, @@ -313,30 +346,34 @@ public class FOText extends FONode { * the offset points to an area outside of the block. */ private char getRelativeCharInBlock(int i, int offset) { + + int charIndex = i + offset; // The easy case is where the desired character is in the same FOText - if (((i + offset) >= 0) && ((i + offset) <= this.endIndex)) { - return ca[i + offset]; + if (charIndex >= 0 && charIndex < this.length()) { + return this.charAt(i + offset); } + // For now, we can't look at following FOText nodes if (offset > 0) { - return '\u0000'; - } + return CharUtilities.NULL_CHAR; + } + // Remaining case has the text in some previous FOText node boolean foundChar = false; - char charToReturn = '\u0000'; + char charToReturn = CharUtilities.NULL_CHAR; FOText nodeToTest = this; int remainingOffset = offset + i; while (!foundChar) { if (nodeToTest.prevFOTextThisBlock == null) { - foundChar = true; break; } nodeToTest = nodeToTest.prevFOTextThisBlock; - if ((nodeToTest.endIndex + remainingOffset) >= 0) { - charToReturn = nodeToTest.ca[nodeToTest.endIndex + remainingOffset]; + int diff = nodeToTest.length() + remainingOffset - 1; + if (diff >= 0) { + charToReturn = nodeToTest.charAt(diff); foundChar = true; } else { - remainingOffset = remainingOffset + nodeToTest.endIndex; + remainingOffset += diff; } } return charToReturn; @@ -367,39 +404,6 @@ public class FOText extends FONode { } /** - * Transforms one character in ca[] using the text-transform property. - * - * @param i the index into ca[] - * @return char with transformed value - */ - private char charTransform(int i) { - switch (textTransform) { - /* put NONE first, as this is probably the common case */ - case Constants.EN_NONE: - return ca[i]; - case Constants.EN_UPPERCASE: - return Character.toUpperCase(ca[i]); - case Constants.EN_LOWERCASE: - return Character.toLowerCase(ca[i]); - case Constants.EN_CAPITALIZE: - if (isStartOfWord(i)) { - /* - Use toTitleCase here. Apparently, some languages use - a different character to represent a letter when using - initial caps than when all of the letters in the word - are capitalized. We will try to let Java handle this. - */ - return Character.toTitleCase(ca[i]); - } else { - return ca[i]; - } - default: - assert false; //should never happen as the property subsystem catches that case - return ca[i]; - } - } - - /** * Determines whether the input char should be considered part of a * "word". This is used primarily to determine whether the character * immediately following starts a new word, but may have other uses. @@ -484,57 +488,64 @@ public class FOText extends FONode { } private class TextCharIterator extends CharIterator { - private int curIndex = 0; - - /* Current space removal process: just increment the startIndex - to "remove" leading spaces from ca, until an unremoved character - is found. Then perform arraycopy's to remove extra spaces - between words. nextCharCalled is used to determine if an - unremoved character has already been found--if its value > 2 - than it means that has occurred (it is reset to zero each time we - remove a space via incrementing the startIndex.) */ - private int nextCharCalled = 0; - + + int currentPosition = 0; + + boolean canRemove = false; + boolean canReplace = false; + + /** {@inheritDoc} */ public boolean hasNext() { - if (curIndex == 0) { -// log.debug("->" + new String(ca) + "<-"); - } - return (curIndex < endIndex); + return (this.currentPosition < charBuffer.limit()); } + /** {@inheritDoc} */ public char nextChar() { - if (curIndex < endIndex) { - nextCharCalled++; - // Just a char class? Don't actually care about the value! - return ca[curIndex++]; + + if (this.currentPosition < charBuffer.limit()) { + this.canRemove = true; + this.canReplace = true; + return charBuffer.get(currentPosition++); } else { throw new NoSuchElementException(); } + } + /** {@inheritDoc} */ public void remove() { - if (curIndex < endIndex && nextCharCalled < 2) { - startIndex++; - nextCharCalled = 0; -// log.debug("removeA: " + new String(ca, startIndex, endIndex - startIndex)); - } else if (curIndex < endIndex) { - // copy from curIndex to end to curIndex-1 - System.arraycopy(ca, curIndex, ca, curIndex - 1, - endIndex - curIndex); - endIndex--; - curIndex--; -// log.debug("removeB: " + new String(ca, startIndex, endIndex - startIndex)); - } else if (curIndex == endIndex) { -// log.debug("removeC: " + new String(ca, startIndex, endIndex - startIndex)); - endIndex--; - curIndex--; + + if (this.canRemove) { + charBuffer.position(currentPosition); + // Slice the buffer at the current position + CharBuffer tmp = charBuffer.slice(); + // Reset position to before current character + charBuffer.position(--currentPosition); + if (tmp.hasRemaining()) { + // Transfer any remaining characters + charBuffer.mark(); + charBuffer.put(tmp); + charBuffer.reset(); + } + // Decrease limit + charBuffer.limit(charBuffer.limit() - 1); + // Make sure following calls fail, unless nextChar() was called + this.canRemove = false; + } else { + throw new IllegalStateException(); } + } + /** {@inheritDoc} */ public void replaceChar(char c) { - if (curIndex > 0 && curIndex <= endIndex) { - ca[curIndex - 1] = c; + + if (this.canReplace) { + charBuffer.put(currentPosition - 1, c); + } else { + throw new IllegalStateException(); } + } } @@ -560,7 +571,7 @@ public class FOText extends FONode { return color; } - /** + /** * @return the "keep-together" property. */ public KeepProperty getKeepTogether() { @@ -571,40 +582,40 @@ public class FOText extends FONode { * @return the "letter-spacing" property. */ public Property getLetterSpacing() { - return letterSpacing; + return letterSpacing; } - + /** * @return the "line-height" property. */ public SpaceProperty getLineHeight() { return lineHeight; } - + /** * @return the "white-space-treatment" property */ public int getWhitespaceTreatment() { return whiteSpaceTreatment; } - + /** * @return the "word-spacing" property. */ public Property getWordSpacing() { - return wordSpacing; + return wordSpacing; } - + /** * @return the "wrap-option" property. */ public int getWrapOption() { - return wrapOption; + return wrapOption; } - + /** @return the "text-decoration" property. */ public CommonTextDecoration getTextDecoration() { - return textDecoration; + return textDecoration; } /** @return the baseline-shift property */ @@ -614,14 +625,12 @@ public class FOText extends FONode { /** {@inheritDoc} */ public String toString() { - StringBuffer sb = new StringBuffer(super.toString()); - sb.append(" (").append(ca).append(")"); - return sb.toString(); + return (this.charBuffer == null) ? "" : this.charBuffer.toString(); } - + /** {@inheritDoc} */ public String getLocalName() { - return null; + return "#PCDATA"; } /** {@inheritDoc} */ @@ -631,10 +640,34 @@ public class FOText extends FONode { /** {@inheritDoc} */ protected String gatherContextInfo() { - if (getLocator() != null) { + if (this.locator != null) { return super.gatherContextInfo(); } else { - return new String(ca).trim(); + return this.toString(); + } + } + + /** {@inheritDoc} */ + public char charAt(int position) { + return this.charBuffer.get(position); + } + + /** {@inheritDoc} */ + public CharSequence subSequence(int start, int end) { + return this.charBuffer.subSequence(start, end); + } + + /** {@inheritDoc} */ + public int length() { + return this.charBuffer.limit(); + } + + /** + * Resets the backing <code>java.nio.CharBuffer</code> + */ + public void resetBuffer() { + if (this.charBuffer != null) { + this.charBuffer.rewind(); } - } -}
\ No newline at end of file + } +} |