From 4a45db76cee354948589f04c075dc0bdc943c9ca Mon Sep 17 00:00:00 2001 From: Karen Lease Date: Wed, 21 Nov 2001 22:13:36 +0000 Subject: [PATCH] Remove extra whitespace during FO tree construction git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@194572 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/fop/fo/AbstractCharIterator.java | 39 +++++ src/org/apache/fop/fo/CharClass.java | 85 ++++++++++ src/org/apache/fop/fo/CharIterator.java | 19 +++ src/org/apache/fop/fo/FONode.java | 26 +++ src/org/apache/fop/fo/FOText.java | 49 ++++++ src/org/apache/fop/fo/FObj.java | 20 ++- src/org/apache/fop/fo/FObjMixed.java | 7 + src/org/apache/fop/fo/InlineCharIterator.java | 54 ++++++ src/org/apache/fop/fo/OneCharIterator.java | 35 ++++ .../apache/fop/fo/RecursiveCharIterator.java | 81 +++++++++ src/org/apache/fop/fo/flow/Block.java | 157 +++++++++++++++++- src/org/apache/fop/fo/flow/Character.java | 10 +- src/org/apache/fop/fo/flow/Inline.java | 5 + .../fop/fo/pagination/SimplePageMaster.java | 2 +- 14 files changed, 585 insertions(+), 4 deletions(-) create mode 100644 src/org/apache/fop/fo/AbstractCharIterator.java create mode 100644 src/org/apache/fop/fo/CharClass.java create mode 100644 src/org/apache/fop/fo/CharIterator.java create mode 100644 src/org/apache/fop/fo/InlineCharIterator.java create mode 100644 src/org/apache/fop/fo/OneCharIterator.java create mode 100644 src/org/apache/fop/fo/RecursiveCharIterator.java diff --git a/src/org/apache/fop/fo/AbstractCharIterator.java b/src/org/apache/fop/fo/AbstractCharIterator.java new file mode 100644 index 000000000..b040f413e --- /dev/null +++ b/src/org/apache/fop/fo/AbstractCharIterator.java @@ -0,0 +1,39 @@ +/* + * $Id$ + * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. + * For details on use and redistribution please refer to the + * LICENSE file included with these sources. + */ + +package org.apache.fop.fo; + +// FOP +import org.apache.fop.apps.FOPException; +import java.util.NoSuchElementException; + +public abstract class AbstractCharIterator implements CharIterator, Cloneable { + + abstract public boolean hasNext(); + + abstract public char nextChar() throws NoSuchElementException ; + + public Object next() throws NoSuchElementException { + return new Character(nextChar()); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + + public void replaceChar(char c) { + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException ex) { + return null; + } + } +}; diff --git a/src/org/apache/fop/fo/CharClass.java b/src/org/apache/fop/fo/CharClass.java new file mode 100644 index 000000000..6723edb57 --- /dev/null +++ b/src/org/apache/fop/fo/CharClass.java @@ -0,0 +1,85 @@ +/* + * $Id$ + * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. + * For details on use and redistribution please refer to the + * LICENSE file included with these sources." + */ + +package org.apache.fop.fo; + + +/** + * A character class allowing to distinguish whitespace, LF, other text. + */ +public class CharClass { + + /** Character code used to signal a character boundary in + * inline content, such as an inline with borders and padding + * or a nested block object. + */ + public static final char CODE_EOT=0; + + public static final int UCWHITESPACE=0; // unicode white space + public static final int LINEFEED=1; + public static final int EOT=2; // Boundary beteween text runs + public static final int NONWHITESPACE=3; + public static final int XMLWHITESPACE=4; + + + /** + * Return the appropriate CharClass constant for the type + * of the passed character. + */ + public static int classOf(char c) { + if (c == CODE_EOT) return EOT; + if (c == '\n') return LINEFEED; + if ( c==' '|| c == '\r' || c=='\t' ) return XMLWHITESPACE; + if (isAnySpace(c)) return UCWHITESPACE; + return NONWHITESPACE; + } + + + /** + * Helper method to determine if the character is a + * space with normal behaviour. Normal behaviour means that + * it's not non-breaking + */ + private static boolean isSpace(char c) { + return (c == ' ' || + (c >= '\u2000' && c <= '\u200B')); +// c == '\u2000' // en quad +// c == '\u2001' // em quad +// c == '\u2002' // en space +// c == '\u2003' // em space +// c == '\u2004' // three-per-em space +// c == '\u2005' // four--per-em space +// c == '\u2006' // six-per-em space +// c == '\u2007' // figure space +// c == '\u2008' // punctuation space +// c == '\u2009' // thin space +// c == '\u200A' // hair space +// c == '\u200B' // zero width space + + } + + + /** + * Method to determine if the character is a nonbreaking + * space. + */ + private static boolean isNBSP(char c) { + return (c == '\u00A0' || // normal no-break space + c == '\u202F' || // narrow no-break space + c == '\u3000' || // ideographic space + c == '\uFEFF') ; // zero width no-break space + } + + /** + * @return true if the character represents any kind of space + */ + private static boolean isAnySpace(char c) { + return (isSpace(c) || isNBSP(c)); + } + +} + diff --git a/src/org/apache/fop/fo/CharIterator.java b/src/org/apache/fop/fo/CharIterator.java new file mode 100644 index 000000000..a0657ed8a --- /dev/null +++ b/src/org/apache/fop/fo/CharIterator.java @@ -0,0 +1,19 @@ +/* + * $Id$ + * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. + * For details on use and redistribution please refer to the + * LICENSE file included with these sources. + */ + +package org.apache.fop.fo; + +import java.util.Iterator; +import java.util.NoSuchElementException; + + +public interface CharIterator extends Iterator { + + char nextChar() throws NoSuchElementException ; + void replaceChar(char c); + Object clone(); +} diff --git a/src/org/apache/fop/fo/FONode.java b/src/org/apache/fop/fo/FONode.java index 911020b9e..9f2d5aae5 100644 --- a/src/org/apache/fop/fo/FONode.java +++ b/src/org/apache/fop/fo/FONode.java @@ -16,6 +16,9 @@ import org.apache.log.Logger; import org.xml.sax.Attributes; +import java.util.ListIterator; +import java.util.NoSuchElementException; + /** * base class for nodes in the XML tree * @@ -98,4 +101,27 @@ abstract public class FONode { return this.parent; } + /** + * Return an iterator over all the children of this FObj. + * @return A ListIterator. + */ + public ListIterator getChildren() { + return null; + } + + /** + * Return an iterator over the object's children starting + * at the pased node. + * @param childNode First node in the iterator + * @return A ListIterator or null if childNode isn't a child of + * this FObj. + */ + public ListIterator getChildren(FONode childNode) { + return null; + } + + public CharIterator charIterator() { + return new OneCharIterator(CharClass.CODE_EOT); + } + } diff --git a/src/org/apache/fop/fo/FOText.java b/src/org/apache/fop/fo/FOText.java index 84b9c667b..a3a2bb393 100644 --- a/src/org/apache/fop/fo/FOText.java +++ b/src/org/apache/fop/fo/FOText.java @@ -19,6 +19,8 @@ import org.apache.fop.system.BufferManager; import org.apache.fop.layoutmgr.LayoutManager; import org.apache.fop.layoutmgr.TextLayoutManager; +import java.util.NoSuchElementException; + /** * a text node in the formatting object tree * @@ -89,7 +91,54 @@ public class FOText extends FObj { } public LayoutManager getLayoutManager() { + // What if nothing left (length=0)? + if (length < ca.length) { + char[] tmp = ca; + ca = new char[length]; + System.arraycopy(tmp, 0, ca, 0, length); + } return new TextLayoutManager(this, ca, textInfo); } + + public CharIterator charIterator() { + return new TextCharIterator(); + } + + private class TextCharIterator extends AbstractCharIterator { + int curIndex = 0; + public boolean hasNext() { + return (curIndex < length); + } + + public char nextChar() { + if (curIndex < length) { + // Just a char class? Don't actually care about the value! + return ca[curIndex++]; + } + else throw new NoSuchElementException(); + } + + public void remove() { + if (curIndex>0 && curIndex < length) { + // copy from curIndex to end to curIndex-1 + System.arraycopy(ca, curIndex, ca, curIndex-1, + length-curIndex); + length--; + curIndex--; + } + else if (curIndex == length) { + curIndex = --length; + } + } + + + public void replaceChar(char c) { + if (curIndex>0 && curIndex <= length) { + ca[curIndex-1]=c; + } + } + + + } } diff --git a/src/org/apache/fop/fo/FObj.java b/src/org/apache/fop/fo/FObj.java index 326f9674f..82f0882d6 100644 --- a/src/org/apache/fop/fo/FObj.java +++ b/src/org/apache/fop/fo/FObj.java @@ -204,11 +204,29 @@ public class FObj extends FONode { return null; } - + /** + * Return an iterator over all the children of this FObj. + * @return A ListIterator. + */ public ListIterator getChildren() { return children.listIterator(); } + /** + * Return an iterator over the object's children starting + * at the pased node. + * @param childNode First node in the iterator + * @return A ListIterator or null if childNode isn't a child of + * this FObj. + */ + public ListIterator getChildren(FONode childNode) { + int i = children.indexOf(childNode); + if (i >= 0) { + return children.listIterator(i); + } + else return null; + } + public void setIsInTableCell() { this.isInTableCell = true; // made recursive by Eric Schaeffer diff --git a/src/org/apache/fop/fo/FObjMixed.java b/src/org/apache/fop/fo/FObjMixed.java index cb7b4b3ff..d525fb243 100644 --- a/src/org/apache/fop/fo/FObjMixed.java +++ b/src/org/apache/fop/fo/FObjMixed.java @@ -14,6 +14,7 @@ import org.apache.fop.apps.FOPException; import org.apache.fop.apps.StreamRenderer; import org.apache.fop.datatypes.ColorType; + /** * base class for representation of mixed content formatting objects * and their processing @@ -93,5 +94,11 @@ public class FObjMixed extends FObj { return new Status(Status.OK); } + public CharIterator charIterator() { + return new RecursiveCharIterator(this); + } + + + } diff --git a/src/org/apache/fop/fo/InlineCharIterator.java b/src/org/apache/fop/fo/InlineCharIterator.java new file mode 100644 index 000000000..f8d048814 --- /dev/null +++ b/src/org/apache/fop/fo/InlineCharIterator.java @@ -0,0 +1,54 @@ +package org.apache.fop.fo; + +import org.apache.fop.layout.BorderAndPadding; +import java.util.Iterator; +import java.util.ListIterator; +import java.util.NoSuchElementException; + + +public class InlineCharIterator extends RecursiveCharIterator { + private boolean bStartBoundary=false; + private boolean bEndBoundary=false; + + public InlineCharIterator(FObj fobj, BorderAndPadding bap) { + super(fobj); + checkBoundaries(bap); + } + + + private void checkBoundaries(BorderAndPadding bap) { + // TODO! use start and end in BAP!! + bStartBoundary = (bap.getBorderLeftWidth(false)>0 || + bap.getPaddingLeft(false)>0); + bEndBoundary = (bap.getBorderRightWidth(false)>0 || + bap.getPaddingRight(false)>0); + } + + public boolean hasNext() { + if (bStartBoundary) return true; + return (super.hasNext() || bEndBoundary); + /* If super.hasNext() returns false, + * we return true if we are going to return a "boundary" signal + * else false. + */ + } + + public char nextChar() throws NoSuchElementException { + if (bStartBoundary) { + bStartBoundary=false; + return CharClass.CODE_EOT; + } + try { + return super.nextChar(); + } + catch (NoSuchElementException e) { + // Underlying has nothing more to return + // Check end boundary char + if (bEndBoundary) { + bEndBoundary=false; + return CharClass.CODE_EOT; + } + else throw e; + } + } +} diff --git a/src/org/apache/fop/fo/OneCharIterator.java b/src/org/apache/fop/fo/OneCharIterator.java new file mode 100644 index 000000000..2eb8a7a5b --- /dev/null +++ b/src/org/apache/fop/fo/OneCharIterator.java @@ -0,0 +1,35 @@ +/* + * $Id$ + * Copyright (C) 2001 The Apache Software Foundation. All rights reserved. + * For details on use and redistribution please refer to the + * LICENSE file included with these sources. + */ + +package org.apache.fop.fo; + +import java.util.Iterator; +import java.util.NoSuchElementException; + + +public class OneCharIterator extends AbstractCharIterator { + + private boolean bFirst=true; + private char charCode; + + public OneCharIterator(char c) { + this.charCode = c; + } + + public boolean hasNext() { + return bFirst; + } + + public char nextChar() throws NoSuchElementException { + if (bFirst) { + bFirst=false; + return charCode; + } + else throw new NoSuchElementException(); + } + +} diff --git a/src/org/apache/fop/fo/RecursiveCharIterator.java b/src/org/apache/fop/fo/RecursiveCharIterator.java new file mode 100644 index 000000000..edd3d990b --- /dev/null +++ b/src/org/apache/fop/fo/RecursiveCharIterator.java @@ -0,0 +1,81 @@ +package org.apache.fop.fo; + +import java.util.Iterator; +import java.util.ListIterator; +import java.util.NoSuchElementException; + + +public class RecursiveCharIterator extends AbstractCharIterator { + Iterator childIter = null; // Child flow objects + CharIterator curCharIter = null; // Children's characters + private FONode fobj; + private FONode curChild; + + public RecursiveCharIterator(FObj fobj) { + // Set up first child iterator + this.fobj = fobj; + this.childIter = fobj.getChildren(); + getNextCharIter(); + } + + public RecursiveCharIterator(FObj fobj, FONode child) { + // Set up first child iterator + this.fobj = fobj; + this.childIter = fobj.getChildren(child); + getNextCharIter(); + } + + public CharIterator mark() { + return (CharIterator) this.clone(); + } + + public Object clone() { + RecursiveCharIterator ci = (RecursiveCharIterator)super.clone(); + ci.childIter = fobj.getChildren(ci.curChild); + ci.curCharIter = (CharIterator)curCharIter.clone(); + return ci; + } + + + public void replaceChar(char c) { + if (curCharIter != null) { + curCharIter.replaceChar(c); + } + } + + + private void getNextCharIter() { + if (childIter.hasNext()) { + this.curChild = (FONode)childIter.next(); + this.curCharIter = curChild.charIterator(); + } + else { + curChild = null; + curCharIter = null; + } + } + + public boolean hasNext() { + while (curCharIter != null) { + if (curCharIter.hasNext()==false) { + getNextCharIter(); + } + else return true; + } + return false; + } + + public char nextChar() throws NoSuchElementException { + if (curCharIter != null) { + return curCharIter.nextChar(); + } + else throw new NoSuchElementException(); + } + + + public void remove() { + if (curCharIter != null) { + curCharIter.remove(); + } + } +} diff --git a/src/org/apache/fop/fo/flow/Block.java b/src/org/apache/fop/fo/flow/Block.java index 3b43e16cb..ba1b22ec4 100644 --- a/src/org/apache/fop/fo/flow/Block.java +++ b/src/org/apache/fop/fo/flow/Block.java @@ -54,10 +54,19 @@ public class Block extends FObjMixed { String id; int span; + private int wsTreatment; //ENUMERATION + private int lfTreatment; //ENUMERATION + private boolean bWScollapse; //true if white-space-collapse=true // this may be helpful on other FOs too boolean anythingLaidOut = false; + /** + * Index of first inline-type FO seen in a sequence. + * Used during FO tree building to do white-space handling. + */ + private FONode firstInlineChild = null; + public Block(FONode parent) { super(parent); } @@ -65,6 +74,10 @@ public class Block extends FObjMixed { public void handleAttrs(Attributes attlist) throws FOPException { super.handleAttrs(attlist); this.span = this.properties.get("span").getEnum(); + this.wsTreatment = this.properties.get("white-space-treatment").getEnum(); + this.bWScollapse = (this.properties.get("white-space-collapse").getEnum() == + Constants.TRUE); + this.lfTreatment = this.properties.get("linefeed-treatment").getEnum(); } public Status layout(Area area) throws FOPException { @@ -118,7 +131,7 @@ public class Block extends FObjMixed { // this.properties.get("line-height-shift-adjustment"); // this.properties.get("line-stacking-strategy"); // this.properties.get("orphans"); - // this.properties.get("space-treatment"); + // this.properties.get("white-space-treatment"); // this.properties.get("span"); // this.properties.get("text-align"); // this.properties.get("text-align-last"); @@ -360,4 +373,146 @@ public class Block extends FObjMixed { public boolean generatesInlineAreas() { return false; } + + + public void addChild(FONode child) { + // Handle whitespace based on values of properties + // Handle a sequence of inline-producing children in + // one pass + if (((FObj)child).generatesInlineAreas()) { + if (firstInlineChild == null) { + firstInlineChild = child; + } + // lastInlineChild = children.size(); + } + else { + // Handle whitespace in preceeding inline areas if any + handleWhiteSpace(); + } + super.addChild(child); + } + + public void end() { + handleWhiteSpace(); + } + + private void handleWhiteSpace() { + log.debug("fo:block: handleWhiteSpace"); + if (firstInlineChild != null) { + boolean bInWS=false; + boolean bPrevWasLF=false; + RecursiveCharIterator charIter = + new RecursiveCharIterator(this, firstInlineChild); + LFchecker lfCheck = new LFchecker(charIter); + + while (charIter.hasNext()) { + switch (CharClass.classOf(charIter.nextChar())) { + case CharClass.XMLWHITESPACE: + /* Some kind of whitespace character, except linefeed. */ + boolean bIgnore=false; + + switch (wsTreatment) { + case Constants.IGNORE: + bIgnore=true; + break; + case Constants.IGNORE_IF_BEFORE_LINEFEED: + bIgnore = lfCheck.nextIsLF(); + break; + case Constants.IGNORE_IF_SURROUNDING_LINEFEED: + bIgnore = (bPrevWasLF || lfCheck.nextIsLF()); + break; + case Constants.IGNORE_IF_AFTER_LINEFEED: + bIgnore = bPrevWasLF; + break; + } + // Handle ignore + if (bIgnore) { + charIter.remove(); + } + else if (bWScollapse) { + if (bInWS || (lfTreatment == Constants.PRESERVE && + (bPrevWasLF || lfCheck.nextIsLF()))) { + charIter.remove(); + } + else { + bInWS = true; + } + } + break; + + case CharClass.LINEFEED: + /* A linefeed */ + lfCheck.reset(); + bPrevWasLF=true; // for following whitespace + + switch (lfTreatment) { + case Constants.IGNORE: + charIter.remove(); + break; + case Constants.TREAT_AS_SPACE: + if (bInWS) { + // only if bWScollapse=true + charIter.remove(); + } + else { + if (bWScollapse) bInWS=true; + charIter.replaceChar('\u0020'); + } + break; + case Constants.TREAT_AS_ZERO_WIDTH_SPACE: + charIter.replaceChar('\u200b'); + // Fall through: this isn't XML whitespace + case Constants.PRESERVE: + bInWS=false; + break; + } + break; + + case CharClass.EOT: + // A "boundary" objects such as non-character inline + // or nested block object was encountered. + // If any whitespace run in progress, finish it. + // FALL THROUGH + + case CharClass.UCWHITESPACE: // Non XML-whitespace + case CharClass.NONWHITESPACE: + /* Any other character */ + bInWS = bPrevWasLF=false; + lfCheck.reset(); + break; + } + } + firstInlineChild = null; + } + } + + private static class LFchecker { + private boolean bNextIsLF=false; + private RecursiveCharIterator charIter; + + LFchecker(RecursiveCharIterator charIter) { + this.charIter = charIter; + } + + boolean nextIsLF() { + if (bNextIsLF==false) { + CharIterator lfIter = charIter.mark(); + while (lfIter.hasNext()) { + char c = lfIter.nextChar(); + if (c == '\n') { + bNextIsLF=true; + break; + } + else if (CharClass.classOf(c)!=CharClass.XMLWHITESPACE) { + break; + } + } + } + return bNextIsLF; + } + + void reset() { + bNextIsLF=false; + } + } } diff --git a/src/org/apache/fop/fo/flow/Character.java b/src/org/apache/fop/fo/flow/Character.java index 46507bf9a..3bcdf934c 100644 --- a/src/org/apache/fop/fo/flow/Character.java +++ b/src/org/apache/fop/fo/flow/Character.java @@ -37,6 +37,8 @@ public class Character extends FObj { public final static int OK = 0; public final static int DOESNOT_FIT = 1; + private char characterValue; + public Character(FONode parent) { super(parent); this.name = "fo:character"; @@ -112,7 +114,7 @@ public class Character extends FObj { } // Character specific properties - char characterValue = this.properties.get("character").getCharacter(); + characterValue = this.properties.get("character").getCharacter(); // initialize id @@ -147,4 +149,10 @@ public class Character extends FObj { } + public CharIterator charIterator() { + return new OneCharIterator(characterValue); + // But what it the character is ignored due to white space handling? + } + + } diff --git a/src/org/apache/fop/fo/flow/Inline.java b/src/org/apache/fop/fo/flow/Inline.java index e161b3f9a..c6f78e8d0 100644 --- a/src/org/apache/fop/fo/flow/Inline.java +++ b/src/org/apache/fop/fo/flow/Inline.java @@ -87,4 +87,9 @@ public class Inline extends FObjMixed { } } + + public CharIterator charIterator() { + return new InlineCharIterator(this, propMgr.getBorderAndPadding()); + } + } diff --git a/src/org/apache/fop/fo/pagination/SimplePageMaster.java b/src/org/apache/fop/fo/pagination/SimplePageMaster.java index 6c01d2e7f..d5b6d9f4e 100644 --- a/src/org/apache/fop/fo/pagination/SimplePageMaster.java +++ b/src/org/apache/fop/fo/pagination/SimplePageMaster.java @@ -112,7 +112,7 @@ public class SimplePageMaster extends FObj { new Rectangle(0,0, pageWidth,pageHeight))); - _regions = null; + // _regions = null; // PageSequence access SimplePageMaster.... children = null; properties = null; } -- 2.39.5