From 045a3a5bb5082f9fc4fb0565ded8d8146757a298 Mon Sep 17 00:00:00 2001 From: "Andreas L. Delmelle" Date: Fri, 30 Dec 2005 13:33:18 +0000 Subject: [PATCH] Revision of refinement white-space handling (cfr. Bugzilla 37639) git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@360083 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/fop/fo/FOEventHandler.java | 13 + src/java/org/apache/fop/fo/FObjMixed.java | 25 +- .../apache/fop/fo/XMLWhiteSpaceHandler.java | 246 ++++++++++++++++++ src/java/org/apache/fop/fo/flow/Block.java | 191 ++------------ .../org/apache/fop/fo/flow/Character.java | 19 +- src/java/org/apache/fop/fo/flow/Inline.java | 10 +- .../apache/fop/fo/flow/RetrieveMarker.java | 2 +- status.xml | 7 +- .../block_white-space-collapse_2.xml | 8 +- .../standard-testcases/leader_text-align.xml | 18 +- .../standard-testcases/leader_toc.xml | 12 +- 11 files changed, 338 insertions(+), 213 deletions(-) create mode 100644 src/java/org/apache/fop/fo/XMLWhiteSpaceHandler.java diff --git a/src/java/org/apache/fop/fo/FOEventHandler.java b/src/java/org/apache/fop/fo/FOEventHandler.java index 3dce3be46..c8b11afd3 100644 --- a/src/java/org/apache/fop/fo/FOEventHandler.java +++ b/src/java/org/apache/fop/fo/FOEventHandler.java @@ -82,6 +82,11 @@ public abstract class FOEventHandler { */ protected PropertyListMaker propertyListMaker; + /** + * The XMLWhitespaceHandler for this tree + */ + protected XMLWhiteSpaceHandler whiteSpaceHandler = new XMLWhiteSpaceHandler(); + /** * Main constructor * @param foUserAgent the apps.FOUserAgent instance for this process @@ -128,6 +133,14 @@ public abstract class FOEventHandler { public void setPropertyListMaker(PropertyListMaker propertyListMaker) { this.propertyListMaker = propertyListMaker; } + + /** + * Return the XMLWhiteSpaceHandler + * @return the whiteSpaceHandler + */ + public XMLWhiteSpaceHandler getXMLWhiteSpaceHandler() { + return whiteSpaceHandler; + } /** * This method is called to indicate the start of a new document run. diff --git a/src/java/org/apache/fop/fo/FObjMixed.java b/src/java/org/apache/fop/fo/FObjMixed.java index a665bbd0e..b52cb9d03 100644 --- a/src/java/org/apache/fop/fo/FObjMixed.java +++ b/src/java/org/apache/fop/fo/FObjMixed.java @@ -32,6 +32,9 @@ public abstract class FObjMixed extends FObj { /** Represents accumulated, pending FO text. See flushText(). */ protected FOText ft = null; + /** Used for white-space handling; start CharIterator at node ... */ + protected FONode currentTextNode; + /** * @param parent FONode that is the parent of this object */ @@ -54,6 +57,10 @@ public abstract class FObjMixed extends FObj { /** @see org.apache.fop.fo.FONode#endOfNode() */ protected void endOfNode() throws FOPException { flushText(); + if (getNameId() != FO_LEADER) { + getFOEventHandler().whiteSpaceHandler + .handleWhiteSpace(this, currentTextNode); + } super.endOfNode(); } @@ -72,11 +79,24 @@ public abstract class FObjMixed extends FObj { } } + /** + * @see org.apache.fop.fo.FONode#addChildNode(FONode) + */ protected void addChildNode(FONode child) throws FOPException { flushText(); + if (child instanceof FOText || child.getNameId() == FO_CHARACTER) { + if (currentTextNode == null) { + currentTextNode = child; + } + } else if (getNameId() != FO_LEADER) { + // handle white-space for all text up to here + getFOEventHandler().whiteSpaceHandler + .handleWhiteSpace(this, currentTextNode, child); + currentTextNode = null; + } super.addChildNode(child); } - + /** * @return iterator for this object */ @@ -84,5 +104,4 @@ public abstract class FObjMixed extends FObj { return new RecursiveCharIterator(this); } -} - +} \ No newline at end of file diff --git a/src/java/org/apache/fop/fo/XMLWhiteSpaceHandler.java b/src/java/org/apache/fop/fo/XMLWhiteSpaceHandler.java new file mode 100644 index 000000000..87384cf7a --- /dev/null +++ b/src/java/org/apache/fop/fo/XMLWhiteSpaceHandler.java @@ -0,0 +1,246 @@ +/* + * Copyright 2005 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.fo; + +import java.util.List; +import org.apache.fop.fo.flow.Block; +import org.apache.fop.fo.flow.Character; +import org.apache.fop.util.CharUtilities; + +/** + * Class encapsulating the functionality for white-space-handling + * during refinement stage. + * + */ +public class XMLWhiteSpaceHandler { + + // True if we are in a run of white space + private boolean inWhiteSpace = false; + // True if the last char was a linefeed + private boolean afterLinefeed = true; + + private Block currentBlock; + private FObj currentFO; + private int linefeedTreatment; + private int whiteSpaceTreatment; + private int whiteSpaceCollapse; + private FONode nextChild; + private boolean endOfBlock; + private boolean nextChildIsBlock; + private RecursiveCharIterator charIter; + + private List discardableFOCharacters; + + /** + * Marks a Character object as discardable, so that it is effectively + * removed from the FOTree at the end of handleWhitespace() + * @param foChar the Character object to be removed from the list of + * childNodes + */ + public void addDiscardableFOChar(Character foChar) { + if (discardableFOCharacters == null) { + discardableFOCharacters = new java.util.ArrayList(); + } + discardableFOCharacters.add(foChar); + } + + /** + * Handle white-space for the fo that is passed in, starting at + * firstTextNode + * @param fo the FO for which to handle white-space + * @param firstTextNode the node at which to start + */ + public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode) { + if (fo.getNameId() == Constants.FO_BLOCK) { + this.currentBlock = (Block) fo; + this.linefeedTreatment = currentBlock.getLinefeedTreatment(); + this.whiteSpaceCollapse = currentBlock.getWhitespaceCollapse(); + this.whiteSpaceTreatment = currentBlock.getWhitespaceTreatment(); + } + currentFO = fo; + if (firstTextNode == null) { + //nothing to do but initialize related properties + return; + } + charIter = new RecursiveCharIterator(fo, firstTextNode); + inWhiteSpace = false; + if (fo.getNameId() == Constants.FO_BLOCK) { + int textNodeIndex = -1; + if (fo.childNodes != null) { + textNodeIndex = fo.childNodes.indexOf(firstTextNode); + } + afterLinefeed = (textNodeIndex == 0 + || ((FONode) fo.childNodes.get(textNodeIndex - 1)) + .getNameId() == Constants.FO_BLOCK); + } + endOfBlock = (nextChild == null && currentFO == currentBlock); + nextChildIsBlock = (nextChild != null + && nextChild.getNameId() == Constants.FO_BLOCK); + handleWhiteSpace(); + } + + /** + * Handle white-space for the fo that is passed in, starting at + * firstTextNode (when a nested FO is encountered) + * @param fo the FO for which to handle white-space + * @param firstTextNode the node at which to start + * @param nextChild the child-node that will be added to the list after + * the last text-node + */ + public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode, FONode nextChild) { + this.nextChild = nextChild; + handleWhiteSpace(fo, firstTextNode); + this.nextChild = null; + } + + private void handleWhiteSpace() { + + EOLchecker lfCheck = new EOLchecker(charIter); + + while (charIter.hasNext()) { + char currentChar = charIter.nextChar(); + int currentCharClass = CharUtilities.classOf(currentChar); + if (currentCharClass == CharUtilities.LINEFEED + && linefeedTreatment == Constants.EN_TREAT_AS_SPACE) { + // if we have a linefeed and it is supposed to be treated + // like a space, that's what we do and continue + currentChar = '\u0020'; + charIter.replaceChar('\u0020'); + currentCharClass = CharUtilities.classOf(currentChar); + } + switch (CharUtilities.classOf(currentChar)) { + case CharUtilities.XMLWHITESPACE: + // Some kind of whitespace character, except linefeed. + if (inWhiteSpace && whiteSpaceCollapse == Constants.EN_TRUE) { + // We are in a run of whitespace and should collapse + // Just delete the char + charIter.remove(); + } else { + // Do the white space treatment here + boolean bIgnore = false; + + switch (whiteSpaceTreatment) { + case Constants.EN_IGNORE: + bIgnore = true; + break; + case Constants.EN_IGNORE_IF_BEFORE_LINEFEED: + bIgnore = lfCheck.beforeLinefeed(); + break; + case Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED: + bIgnore = afterLinefeed + || lfCheck.beforeLinefeed(); + break; + case Constants.EN_IGNORE_IF_AFTER_LINEFEED: + bIgnore = afterLinefeed; + break; + case Constants.EN_PRESERVE: + // nothing to do now, replacement takes place later + break; + default: + //nop + } + // Handle ignore and replacement + if (bIgnore) { + charIter.remove(); + } else { + // this is to retain a single space between words + inWhiteSpace = true; + if (currentChar != '\u0020') { + charIter.replaceChar('\u0020'); + } + } + } + break; + + case CharUtilities.LINEFEED: + // A linefeed + switch (linefeedTreatment) { + case Constants.EN_IGNORE: + charIter.remove(); + break; + case Constants.EN_TREAT_AS_ZERO_WIDTH_SPACE: + charIter.replaceChar(CharUtilities.ZERO_WIDTH_SPACE); + inWhiteSpace = false; + break; + case Constants.EN_PRESERVE: + lfCheck.reset(); + inWhiteSpace = false; + afterLinefeed = true; // for following whitespace + break; + default: + //nop + } + break; + + case CharUtilities.EOT: + // A "boundary" objects such as non-character inline + // or nested block object was encountered. + // If any whitespace run in progress, finish it. + // FALL THROUGH + + default: + // Any other character + inWhiteSpace = false; + afterLinefeed = false; + lfCheck.reset(); + break; + } + } + if (discardableFOCharacters != null + && !discardableFOCharacters.isEmpty()) { + currentFO.childNodes.removeAll(discardableFOCharacters); + discardableFOCharacters.clear(); + } + } + + private class EOLchecker { + private boolean nextIsEOL = false; + private RecursiveCharIterator charIter; + + EOLchecker(RecursiveCharIterator charIter) { + this.charIter = charIter; + } + + boolean beforeLinefeed() { + if (!nextIsEOL) { + CharIterator lfIter = charIter.mark(); + while (lfIter.hasNext()) { + int charClass = CharUtilities.classOf(lfIter.nextChar()); + if (charClass == CharUtilities.LINEFEED) { + if (linefeedTreatment == Constants.EN_PRESERVE) { + nextIsEOL = true; + return nextIsEOL; + } + } else if (charClass != CharUtilities.XMLWHITESPACE) { + return nextIsEOL; + } + } + // No more characters == end of text run + // means EOL if there either is a nested block to be added, + // or if this is the last text node in the current block + nextIsEOL = nextChildIsBlock || endOfBlock; + } + return nextIsEOL; + } + + void reset() { + nextIsEOL = false; + } + } +} diff --git a/src/java/org/apache/fop/fo/flow/Block.java b/src/java/org/apache/fop/fo/flow/Block.java index bfe7ec008..790087cb1 100644 --- a/src/java/org/apache/fop/fo/flow/Block.java +++ b/src/java/org/apache/fop/fo/flow/Block.java @@ -25,14 +25,10 @@ import org.apache.fop.datatypes.ColorType; import org.apache.fop.datatypes.Length; import org.apache.fop.datatypes.Numeric; import org.apache.fop.fo.CharIterator; -import org.apache.fop.fo.Constants; import org.apache.fop.fo.FONode; -import org.apache.fop.fo.FOText; import org.apache.fop.fo.FObjMixed; import org.apache.fop.fo.NullCharIterator; import org.apache.fop.fo.PropertyList; -import org.apache.fop.fo.PropertySets; -import org.apache.fop.fo.RecursiveCharIterator; import org.apache.fop.fo.ValidationException; import org.apache.fop.fo.properties.CommonAccessibility; import org.apache.fop.fo.properties.CommonAural; @@ -43,7 +39,6 @@ import org.apache.fop.fo.properties.CommonMarginBlock; import org.apache.fop.fo.properties.CommonRelativePosition; import org.apache.fop.fo.properties.KeepProperty; import org.apache.fop.fo.properties.SpaceProperty; -import org.apache.fop.util.CharUtilities; /* Modified by Mark Lillywhite mark-fop@inomial.com. The changes @@ -108,12 +103,6 @@ public class Block extends FObjMixed { // this may be helpful on other FOs too private boolean anythingLaidOut = false; - /** - * Index of first inline-type FO seen in a sequence. - * Used during FO tree building to do white-space handling. - */ - private FONode firstInlineChild = null; - /** * @param parent FONode that is the parent of this object * @@ -176,7 +165,6 @@ public class Block extends FObjMixed { */ protected void endOfNode() throws FOPException { super.endOfNode(); - handleWhiteSpace(); getFOEventHandler().endBlock(this); } @@ -348,173 +336,32 @@ public class Block extends FObjMixed { } /** - * @see org.apache.fop.fo.FONode#addChildNode(FONode) + * Accessor for the linefeed-treatment property + * + * @return the enum value of linefeed-treatment */ - public void addChildNode(FONode child) throws FOPException { - flushText(); - // Handle whitespace based on values of properties - // Handle a sequence of inline-producing child nodes in - // one pass - if (child instanceof FOText - || PropertySets.generatesInlineAreas(child.getNameId())) { - if (firstInlineChild == null) { - firstInlineChild = child; - } - // lastInlineChild = childNodes.size(); - } else { - // Handle whitespace in preceeding inline areas if any - handleWhiteSpace(); - } - super.addChildNode(child); + public int getLinefeedTreatment() { + return linefeedTreatment; } /** - * @see org.apache.fop.fo.FObj#notifyChildRemoval(org.apache.fop.fo.FONode) + * Accessor for the white-space-treatment property + * + * @return the enum value of white-space-treatment */ - protected void notifyChildRemoval(FONode node) { - if (node != null && node == firstInlineChild) { - firstInlineChild = null; - } - } - - private void handleWhiteSpace() { - //getLogger().debug("fo:block: handleWhiteSpace"); - if (firstInlineChild == null) { - return; // Nothing to do - } - - boolean inWS = false; // True if we are in a run of white space - /* - * True if the last non white space char seen was a linefeed. - * We start from the beginning of a line so it defaults to True. - */ - boolean prevWasLF = true; - - RecursiveCharIterator charIter = - new RecursiveCharIterator(this, firstInlineChild); - EOLchecker lfCheck = new EOLchecker(charIter); - - while (charIter.hasNext()) { - char currentChar = charIter.nextChar(); - int currentCharClass = CharUtilities.classOf(currentChar); - if (currentCharClass == CharUtilities.LINEFEED - && linefeedTreatment == EN_TREAT_AS_SPACE) { - // if we have a linefeed and it is suppose to be treated - // like a space, that's what we do and continue - currentChar = ' '; - charIter.replaceChar(' '); - currentCharClass = CharUtilities.classOf(currentChar); - } - switch (CharUtilities.classOf(currentChar)) { - case CharUtilities.XMLWHITESPACE: - /* Some kind of whitespace character, except linefeed. */ - if (inWS && whiteSpaceCollapse == EN_TRUE) { - // We are in a run of whitespace and should collapse - // Just delete the char - charIter.remove(); - } else { - // Do the white space treatment here - boolean bIgnore = false; - - switch (whiteSpaceTreatment) { - case Constants.EN_IGNORE: - bIgnore = true; - break; - case Constants.EN_IGNORE_IF_BEFORE_LINEFEED: - bIgnore = linefeedTreatment == Constants.EN_PRESERVE - && lfCheck.nextIsLF(); - break; - case Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED: - bIgnore = (prevWasLF - || (linefeedTreatment == Constants.EN_PRESERVE - && lfCheck.nextIsLF())); - break; - case Constants.EN_IGNORE_IF_AFTER_LINEFEED: - bIgnore = prevWasLF; - break; - case Constants.EN_PRESERVE: - // nothing to do now, replacement takes place later - break; - } - // Handle ignore and replacement - if (bIgnore) { - charIter.remove(); - } else { - // this is to retain a single space between words - inWS = true; - if (currentChar != '\u0020') { - charIter.replaceChar('\u0020'); - } - } - } - break; - - case CharUtilities.LINEFEED: - /* A linefeed */ - switch (linefeedTreatment) { - case Constants.EN_IGNORE: - charIter.remove(); - break; - case Constants.EN_TREAT_AS_ZERO_WIDTH_SPACE: - charIter.replaceChar(CharUtilities.ZERO_WIDTH_SPACE); - inWS = false; - break; - case Constants.EN_PRESERVE: - lfCheck.reset(); - inWS = false; - prevWasLF = true; // for following whitespace - break; - } - break; - - case CharUtilities.EOT: - // A "boundary" objects such as non-character inline - // or nested block object was encountered. - // If any whitespace run in progress, finish it. - // FALL THROUGH - - default: - /* Any other character */ - inWS = prevWasLF = false; - lfCheck.reset(); - break; - } - } - firstInlineChild = null; + public int getWhitespaceTreatment() { + return whiteSpaceTreatment; } - - private static class EOLchecker { - private boolean nextIsEOL = false; - private RecursiveCharIterator charIter; - - EOLchecker(RecursiveCharIterator charIter) { - this.charIter = charIter; - } - - boolean nextIsLF() { - if (nextIsEOL == false) { - CharIterator lfIter = charIter.mark(); - while (lfIter.hasNext()) { - int charClass = CharUtilities.classOf(lfIter.nextChar()); - if (charClass == CharUtilities.LINEFEED) { - nextIsEOL = true; - return nextIsEOL; - } else if (charClass != CharUtilities.XMLWHITESPACE) { - return nextIsEOL; - } - } - // No more characters == end of block == end of line - nextIsEOL = true; - return nextIsEOL; - } - return nextIsEOL; - } - - void reset() { - nextIsEOL = false; - } + + /** + * Accessor for the white-space-collapse property + * + * @return the enum value of white-space-collapse + */ + public int getWhitespaceCollapse() { + return whiteSpaceCollapse; } - + /** @see org.apache.fop.fo.FONode#charIterator() */ public CharIterator charIterator() { return NullCharIterator.getInstance(); diff --git a/src/java/org/apache/fop/fo/flow/Character.java b/src/java/org/apache/fop/fo/flow/Character.java index 5fb72d1e6..492b97d82 100644 --- a/src/java/org/apache/fop/fo/flow/Character.java +++ b/src/java/org/apache/fop/fo/flow/Character.java @@ -158,7 +158,7 @@ public class Character extends FObj { * @see org.apache.fop.fo.FObj#charIterator */ public CharIterator charIterator() { - return new TextCharIterator(); + return new FOCharIterator(this); } /** @@ -269,9 +269,14 @@ public class Character extends FObj { return FO_CHARACTER; } - private class TextCharIterator extends CharIterator { + private class FOCharIterator extends CharIterator { - private boolean bFirst = character != CharUtilities.CODE_EOT; + private boolean bFirst = true; + private Character foChar; + + FOCharIterator(Character foChar) { + this.foChar = foChar; + } public boolean hasNext() { return bFirst; @@ -280,18 +285,20 @@ public class Character extends FObj { public char nextChar() { if (bFirst) { bFirst = false; - return character; + return foChar.character; } else { throw new NoSuchElementException(); } } public void remove() { - character = CharUtilities.CODE_EOT; + foChar.character = CharUtilities.CODE_EOT; + getFOEventHandler().getXMLWhiteSpaceHandler() + .addDiscardableFOChar(foChar); } public void replaceChar(char c) { - character = c; + foChar.character = c; } } diff --git a/src/java/org/apache/fop/fo/flow/Inline.java b/src/java/org/apache/fop/fo/flow/Inline.java index 934cdcca2..40959f07e 100644 --- a/src/java/org/apache/fop/fo/flow/Inline.java +++ b/src/java/org/apache/fop/fo/flow/Inline.java @@ -24,7 +24,7 @@ import org.apache.fop.apps.FOPException; import org.apache.fop.datatypes.Length; import org.apache.fop.fo.CharIterator; import org.apache.fop.fo.FONode; -import org.apache.fop.fo.InlineCharIterator; +import org.apache.fop.fo.OneCharIterator; import org.apache.fop.fo.PropertyList; import org.apache.fop.fo.ValidationException; import org.apache.fop.fo.properties.CommonRelativePosition; @@ -108,6 +108,7 @@ public class Inline extends InlineLevel { } checkId(id); + getFOEventHandler().startInline(this); } @@ -182,13 +183,6 @@ public class Inline extends InlineLevel { return dominantBaseline; } - /** - * @see org.apache.fop.fo.FObjMixed#charIterator - */ - public CharIterator charIterator() { - return new InlineCharIterator(this, commonBorderPaddingBackground); - } - /** @see org.apache.fop.fo.FONode#getLocalName() */ public String getLocalName() { return "inline"; diff --git a/src/java/org/apache/fop/fo/flow/RetrieveMarker.java b/src/java/org/apache/fop/fo/flow/RetrieveMarker.java index 04071ad3e..7aec11771 100644 --- a/src/java/org/apache/fop/fo/flow/RetrieveMarker.java +++ b/src/java/org/apache/fop/fo/flow/RetrieveMarker.java @@ -43,7 +43,7 @@ import org.apache.fop.fo.ValidationException; * This will create a layout manager that will retrieve * a marker based on the information. */ -public class RetrieveMarker extends FObjMixed { +public class RetrieveMarker extends FObj { // The value of properties relevant for fo:retrieve-marker. private String retrieveClassName; private int retrievePosition; diff --git a/status.xml b/status.xml index 09ae37386..1e5252b34 100644 --- a/status.xml +++ b/status.xml @@ -27,10 +27,13 @@ - + + Revision of refinement white-space handling. + + Added feature: support for white-space shorthand - + Added feature: support for page-break-* shorthands diff --git a/test/layoutengine/standard-testcases/block_white-space-collapse_2.xml b/test/layoutengine/standard-testcases/block_white-space-collapse_2.xml index 4f72ba7b9..cf5f4cc0d 100644 --- a/test/layoutengine/standard-testcases/block_white-space-collapse_2.xml +++ b/test/layoutengine/standard-testcases/block_white-space-collapse_2.xml @@ -41,7 +41,7 @@ word and linefeeds - everywhere + everywhere @@ -102,10 +102,6 @@ - - - - - + diff --git a/test/layoutengine/standard-testcases/leader_text-align.xml b/test/layoutengine/standard-testcases/leader_text-align.xml index 086090f28..70c91bb9a 100644 --- a/test/layoutengine/standard-testcases/leader_text-align.xml +++ b/test/layoutengine/standard-testcases/leader_text-align.xml @@ -112,27 +112,27 @@ - + - + - + - + - + - + @@ -140,15 +140,15 @@ - + - + - + diff --git a/test/layoutengine/standard-testcases/leader_toc.xml b/test/layoutengine/standard-testcases/leader_toc.xml index c8b85ce1b..1ac421de9 100644 --- a/test/layoutengine/standard-testcases/leader_toc.xml +++ b/test/layoutengine/standard-testcases/leader_toc.xml @@ -122,32 +122,32 @@ - + - + - + - + - + - + -- 2.39.5