]> source.dussan.org Git - xmlgraphics-fop.git/commitdiff
Remove extra whitespace during FO tree construction
authorKaren Lease <klease@apache.org>
Wed, 21 Nov 2001 22:13:36 +0000 (22:13 +0000)
committerKaren Lease <klease@apache.org>
Wed, 21 Nov 2001 22:13:36 +0000 (22:13 +0000)
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@194572 13f79535-47bb-0310-9956-ffa450edef68

14 files changed:
src/org/apache/fop/fo/AbstractCharIterator.java [new file with mode: 0644]
src/org/apache/fop/fo/CharClass.java [new file with mode: 0644]
src/org/apache/fop/fo/CharIterator.java [new file with mode: 0644]
src/org/apache/fop/fo/FONode.java
src/org/apache/fop/fo/FOText.java
src/org/apache/fop/fo/FObj.java
src/org/apache/fop/fo/FObjMixed.java
src/org/apache/fop/fo/InlineCharIterator.java [new file with mode: 0644]
src/org/apache/fop/fo/OneCharIterator.java [new file with mode: 0644]
src/org/apache/fop/fo/RecursiveCharIterator.java [new file with mode: 0644]
src/org/apache/fop/fo/flow/Block.java
src/org/apache/fop/fo/flow/Character.java
src/org/apache/fop/fo/flow/Inline.java
src/org/apache/fop/fo/pagination/SimplePageMaster.java

diff --git a/src/org/apache/fop/fo/AbstractCharIterator.java b/src/org/apache/fop/fo/AbstractCharIterator.java
new file mode 100644 (file)
index 0000000..b040f41
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * $Id$
+ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
+ * For details on use and redistribution please refer to the
+ * LICENSE file included with these sources.
+ */
+
+package org.apache.fop.fo;
+
+// FOP
+import org.apache.fop.apps.FOPException;
+import java.util.NoSuchElementException;
+    
+public abstract class AbstractCharIterator implements CharIterator, Cloneable {
+
+    abstract public boolean hasNext();
+
+    abstract public char nextChar() throws NoSuchElementException ;
+
+    public Object next() throws NoSuchElementException {
+       return new Character(nextChar());
+    }
+
+    public void remove() {
+       throw new UnsupportedOperationException();
+    }
+
+
+    public void replaceChar(char c) {
+    }
+
+    public Object clone() {
+       try {
+           return super.clone();
+       } catch (CloneNotSupportedException ex) {
+           return null;
+       }
+    }
+};
diff --git a/src/org/apache/fop/fo/CharClass.java b/src/org/apache/fop/fo/CharClass.java
new file mode 100644 (file)
index 0000000..6723edb
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * $Id$
+ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
+ * For details on use and redistribution please refer to the
+ * LICENSE file included with these sources."
+ */
+
+package org.apache.fop.fo;
+
+
+/**
+ * A character class allowing to distinguish whitespace, LF, other text.
+ */
+public class CharClass {
+
+    /** Character code used to signal a character boundary in
+     * inline content, such as an inline with borders and padding
+     * or a nested block object.
+     */
+    public static final char CODE_EOT=0;
+
+    public static final int UCWHITESPACE=0; // unicode white space
+    public static final int LINEFEED=1;
+    public static final int EOT=2; // Boundary beteween text runs
+    public static final int NONWHITESPACE=3;
+    public static final int XMLWHITESPACE=4;
+
+
+    /**
+     * Return the appropriate CharClass constant for the type
+     * of the passed character.
+     */
+    public static int classOf(char c) {
+       if (c == CODE_EOT) return EOT;
+       if (c == '\n') return LINEFEED;
+       if ( c==' '|| c == '\r' || c=='\t' ) return XMLWHITESPACE;
+       if (isAnySpace(c)) return UCWHITESPACE;
+       return NONWHITESPACE;
+    }
+
+
+    /**
+     * Helper method to determine if the character is a
+     * space with normal behaviour. Normal behaviour means that
+     * it's not non-breaking
+     */
+    private static boolean isSpace(char c) {
+        return (c == ' ' ||
+               (c >= '\u2000' && c <= '\u200B'));
+//         c == '\u2000'                   // en quad
+//         c == '\u2001'                   // em quad
+//         c == '\u2002'                   // en space
+//         c == '\u2003'                   // em space
+//         c == '\u2004'                   // three-per-em space
+//         c == '\u2005'                   // four--per-em space
+//         c == '\u2006'                   // six-per-em space
+//         c == '\u2007'                   // figure space
+//         c == '\u2008'                   // punctuation space
+//         c == '\u2009'                   // thin space
+//         c == '\u200A'                   // hair space
+//         c == '\u200B'                   // zero width space
+
+    }
+
+
+    /**
+     * Method to determine if the character is a nonbreaking
+     * space.
+     */
+    private static boolean isNBSP(char c) {
+        return (c == '\u00A0' ||   // normal no-break space
+               c == '\u202F' ||   // narrow no-break space
+               c == '\u3000' ||   // ideographic space
+               c == '\uFEFF') ;   // zero width no-break space
+    }
+
+    /**
+     * @return true if the character represents any kind of space
+     */
+    private static boolean isAnySpace(char c) {
+        return (isSpace(c) || isNBSP(c));
+    }
+
+}
+
diff --git a/src/org/apache/fop/fo/CharIterator.java b/src/org/apache/fop/fo/CharIterator.java
new file mode 100644 (file)
index 0000000..a0657ed
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * $Id$
+ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
+ * For details on use and redistribution please refer to the
+ * LICENSE file included with these sources.
+ */
+
+package org.apache.fop.fo;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+
+public interface CharIterator extends Iterator {
+
+    char nextChar() throws NoSuchElementException ;
+    void replaceChar(char c);
+    Object clone();
+}
index 911020b9e2e2e96c4864937fa9fde236f093630f..9f2d5aae5484ce065d7b256bc7874ee3627d0212 100644 (file)
@@ -16,6 +16,9 @@ import org.apache.log.Logger;
 
 import org.xml.sax.Attributes;
 
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+
 /**
  * base class for nodes in the XML tree
  *
@@ -98,4 +101,27 @@ abstract public class FONode {
         return this.parent;
     }
 
+    /**
+     * Return an iterator over all the children of this FObj.
+     * @return A ListIterator.
+     */
+    public ListIterator getChildren() {
+       return null;
+    }
+
+    /**
+     * Return an iterator over the object's children starting
+     * at the pased node.
+     * @param childNode First node in the iterator
+     * @return A ListIterator or null if childNode isn't a child of
+     * this FObj.
+     */
+    public ListIterator getChildren(FONode childNode) {
+       return null;
+    }
+
+    public CharIterator charIterator() {
+       return new OneCharIterator(CharClass.CODE_EOT);
+    }
+
 }
index 84b9c667b4c61e32794c37803ed9145286015f7e..a3a2bb393c833379870051600fc8afe0e5f1814c 100644 (file)
@@ -19,6 +19,8 @@ import org.apache.fop.system.BufferManager;
 import org.apache.fop.layoutmgr.LayoutManager;
 import org.apache.fop.layoutmgr.TextLayoutManager;
 
+import java.util.NoSuchElementException;
+
 /**
  * a text node in the formatting object tree
  *
@@ -89,7 +91,54 @@ public class FOText extends FObj {
     }
 
     public LayoutManager getLayoutManager() {
+       // What if nothing left (length=0)?
+       if (length < ca.length) {
+           char[] tmp = ca;
+           ca  = new char[length];
+           System.arraycopy(tmp, 0, ca, 0, length);
+       }
        return new TextLayoutManager(this, ca, textInfo);
     }
+
+    public CharIterator charIterator() {
+       return new TextCharIterator();
+    }
+
+    private class TextCharIterator extends AbstractCharIterator {
+       int curIndex = 0;
+       public boolean hasNext() {
+           return (curIndex < length);
+       }
+
+       public char nextChar() {
+           if (curIndex < length) {
+               // Just a char class? Don't actually care about the value!
+               return ca[curIndex++];
+           }
+           else throw new NoSuchElementException();
+       }
+
+       public void remove() {
+           if (curIndex>0 && curIndex < length) {
+               // copy from curIndex to end to curIndex-1
+               System.arraycopy(ca, curIndex, ca, curIndex-1,
+                                length-curIndex);
+               length--;
+               curIndex--;
+           }
+           else if (curIndex == length) {
+               curIndex = --length;
+           }
+       }
+
+
+       public void replaceChar(char c) {
+           if (curIndex>0 && curIndex <= length) {
+               ca[curIndex-1]=c;
+           }
+       }
+
+
+    }
 }
 
index 326f9674f0af21436193f391a32cbc4ff4f25bd3..82f0882d69bef551d7e2b7790d84cf3c44a0a7fd 100644 (file)
@@ -204,11 +204,29 @@ public class FObj extends FONode {
        return null;
     }
 
-    
+    /**
+     * Return an iterator over all the children of this FObj.
+     * @return A ListIterator.
+     */
     public ListIterator getChildren() {
        return children.listIterator();
     }
 
+    /**
+     * Return an iterator over the object's children starting
+     * at the pased node.
+     * @param childNode First node in the iterator
+     * @return A ListIterator or null if childNode isn't a child of
+     * this FObj.
+     */
+    public ListIterator getChildren(FONode childNode) {
+       int i = children.indexOf(childNode);
+       if (i >= 0) {
+           return children.listIterator(i);
+       }
+       else return null;
+    }
+
     public void setIsInTableCell() {
         this.isInTableCell = true;
         // made recursive by Eric Schaeffer
index cb7b4b3fff39b2dac151894d232538a0df10003d..d525fb243037cd1fde03d3dfcba52d876f54bf36 100644 (file)
@@ -14,6 +14,7 @@ import org.apache.fop.apps.FOPException;
 import org.apache.fop.apps.StreamRenderer;
 import org.apache.fop.datatypes.ColorType;
 
+
 /**
  * base class for representation of mixed content formatting objects
  * and their processing
@@ -93,5 +94,11 @@ public class FObjMixed extends FObj {
         return new Status(Status.OK);
     }
 
+    public CharIterator charIterator() {
+       return new RecursiveCharIterator(this);
+    }
+
+
+
 }
 
diff --git a/src/org/apache/fop/fo/InlineCharIterator.java b/src/org/apache/fop/fo/InlineCharIterator.java
new file mode 100644 (file)
index 0000000..f8d0488
--- /dev/null
@@ -0,0 +1,54 @@
+package org.apache.fop.fo;
+
+import org.apache.fop.layout.BorderAndPadding;
+import java.util.Iterator;
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+
+
+public class InlineCharIterator extends RecursiveCharIterator {
+    private boolean bStartBoundary=false;
+    private boolean bEndBoundary=false;
+
+    public InlineCharIterator(FObj fobj, BorderAndPadding bap) {
+       super(fobj);
+       checkBoundaries(bap);
+    }
+
+
+    private void checkBoundaries(BorderAndPadding bap) {
+       // TODO! use start and end in BAP!!
+       bStartBoundary = (bap.getBorderLeftWidth(false)>0 ||
+                         bap.getPaddingLeft(false)>0);
+       bEndBoundary = (bap.getBorderRightWidth(false)>0 ||
+                         bap.getPaddingRight(false)>0);
+    }
+
+    public boolean hasNext() {
+       if (bStartBoundary) return true;
+       return (super.hasNext() || bEndBoundary);
+       /* If super.hasNext() returns false,
+        * we return true if we are going to return a "boundary" signal
+        * else false.
+        */
+    }
+
+    public char nextChar() throws NoSuchElementException {
+       if (bStartBoundary) {
+           bStartBoundary=false;
+           return CharClass.CODE_EOT;
+       }
+       try {
+           return super.nextChar();
+       }
+       catch (NoSuchElementException e) {
+           // Underlying has nothing more to return
+           // Check end boundary char
+           if (bEndBoundary) {
+               bEndBoundary=false;
+               return CharClass.CODE_EOT;
+           }
+           else throw e;
+       }
+    }
+}
diff --git a/src/org/apache/fop/fo/OneCharIterator.java b/src/org/apache/fop/fo/OneCharIterator.java
new file mode 100644 (file)
index 0000000..2eb8a7a
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * $Id$
+ * Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
+ * For details on use and redistribution please refer to the
+ * LICENSE file included with these sources.
+ */
+
+package org.apache.fop.fo;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+    
+public class OneCharIterator extends AbstractCharIterator {
+
+    private boolean bFirst=true;
+    private char charCode;
+
+    public OneCharIterator(char c) {
+       this.charCode = c;
+    }
+
+    public boolean hasNext() {
+       return bFirst;
+    }
+
+    public char nextChar() throws NoSuchElementException {
+       if (bFirst) {
+           bFirst=false;
+           return charCode;
+       }
+       else throw new NoSuchElementException();
+    }
+
+}
diff --git a/src/org/apache/fop/fo/RecursiveCharIterator.java b/src/org/apache/fop/fo/RecursiveCharIterator.java
new file mode 100644 (file)
index 0000000..edd3d99
--- /dev/null
@@ -0,0 +1,81 @@
+package org.apache.fop.fo;
+
+import java.util.Iterator;
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+
+
+public class RecursiveCharIterator extends AbstractCharIterator {
+    Iterator childIter = null; // Child flow objects
+    CharIterator curCharIter = null; // Children's characters
+    private FONode fobj;
+    private FONode curChild;
+
+    public RecursiveCharIterator(FObj fobj) {
+       // Set up first child iterator
+       this.fobj = fobj;
+       this.childIter = fobj.getChildren();
+       getNextCharIter();
+    }
+
+    public RecursiveCharIterator(FObj fobj, FONode child) {
+       // Set up first child iterator
+       this.fobj = fobj;
+       this.childIter = fobj.getChildren(child);
+       getNextCharIter();
+    }
+
+    public CharIterator mark() {
+       return (CharIterator) this.clone();
+    }
+
+    public Object clone() {
+       RecursiveCharIterator ci = (RecursiveCharIterator)super.clone();
+       ci.childIter = fobj.getChildren(ci.curChild);
+       ci.curCharIter = (CharIterator)curCharIter.clone();
+       return ci;
+    }
+
+
+    public void replaceChar(char c) {
+       if (curCharIter != null) {
+           curCharIter.replaceChar(c);
+       }
+    }
+
+
+    private void getNextCharIter() {
+       if (childIter.hasNext()) {
+           this.curChild = (FONode)childIter.next();
+           this.curCharIter = curChild.charIterator();
+       }
+       else {
+           curChild = null;
+           curCharIter = null;
+       }
+    }
+
+    public boolean hasNext() {
+       while (curCharIter != null) {
+           if (curCharIter.hasNext()==false) {
+               getNextCharIter();
+           }
+           else return true;
+       }
+       return false;
+    }
+
+    public char nextChar() throws NoSuchElementException {
+       if (curCharIter != null) {
+           return curCharIter.nextChar();
+       }
+       else throw new NoSuchElementException();
+    }
+
+
+    public void remove() {
+       if (curCharIter != null) {
+           curCharIter.remove();
+       }
+    }
+}
index 3b43e16cb192c48dc8cfc810c15a64989147dc5c..ba1b22ec4dbefebc30caca1da65a3a2aff38b28e 100644 (file)
@@ -54,10 +54,19 @@ public class Block extends FObjMixed {
 
     String id;
     int span;
+    private int wsTreatment; //ENUMERATION
+    private int lfTreatment; //ENUMERATION
+    private boolean bWScollapse; //true if white-space-collapse=true
 
     // this may be helpful on other FOs too
     boolean anythingLaidOut = false;
 
+    /**
+     * Index of first inline-type FO seen in a sequence.
+     * Used during FO tree building to do white-space handling.
+     */
+    private FONode firstInlineChild = null;
+
     public Block(FONode parent) {
         super(parent);
     }
@@ -65,6 +74,10 @@ public class Block extends FObjMixed {
     public void handleAttrs(Attributes attlist) throws FOPException {
         super.handleAttrs(attlist);
         this.span = this.properties.get("span").getEnum();
+        this.wsTreatment = this.properties.get("white-space-treatment").getEnum();
+        this.bWScollapse = (this.properties.get("white-space-collapse").getEnum() ==
+           Constants.TRUE);
+       this.lfTreatment = this.properties.get("linefeed-treatment").getEnum();
     }
 
     public Status layout(Area area) throws FOPException {
@@ -118,7 +131,7 @@ public class Block extends FObjMixed {
             // this.properties.get("line-height-shift-adjustment");
             // this.properties.get("line-stacking-strategy");
             // this.properties.get("orphans");
-            // this.properties.get("space-treatment");
+            // this.properties.get("white-space-treatment");
             // this.properties.get("span");
             // this.properties.get("text-align");
             // this.properties.get("text-align-last");
@@ -360,4 +373,146 @@ public class Block extends FObjMixed {
     public boolean generatesInlineAreas() {
         return false;
     }
+
+
+    public void addChild(FONode child) {
+       // Handle whitespace based on values of properties
+       // Handle a sequence of inline-producing children in
+       // one pass
+       if (((FObj)child).generatesInlineAreas()) {
+           if (firstInlineChild == null) {
+               firstInlineChild = child;
+           }
+           // lastInlineChild = children.size();
+       }
+       else {
+           // Handle whitespace in preceeding inline areas if any
+           handleWhiteSpace();
+       }
+       super.addChild(child);
+    }
+
+    public void end() {
+       handleWhiteSpace();
+    }
+
+    private void handleWhiteSpace() {
+       log.debug("fo:block: handleWhiteSpace");
+       if (firstInlineChild != null) {
+           boolean bInWS=false;
+           boolean bPrevWasLF=false;
+           RecursiveCharIterator charIter =
+               new RecursiveCharIterator(this, firstInlineChild);
+           LFchecker lfCheck = new LFchecker(charIter);
+
+           while (charIter.hasNext()) {
+               switch (CharClass.classOf(charIter.nextChar())) {
+               case CharClass.XMLWHITESPACE:
+                   /* Some kind of whitespace character, except linefeed. */
+                   boolean bIgnore=false;
+                   
+                   switch (wsTreatment) {
+                   case Constants.IGNORE:
+                       bIgnore=true;
+                       break;
+                   case Constants.IGNORE_IF_BEFORE_LINEFEED:
+                       bIgnore = lfCheck.nextIsLF();
+                       break;
+                   case Constants.IGNORE_IF_SURROUNDING_LINEFEED:
+                       bIgnore = (bPrevWasLF || lfCheck.nextIsLF());
+                       break;
+                   case Constants.IGNORE_IF_AFTER_LINEFEED:
+                       bIgnore = bPrevWasLF;
+                       break;
+                   }
+                   // Handle ignore
+                   if (bIgnore) {
+                       charIter.remove();
+                   }
+                   else if (bWScollapse) {
+                       if (bInWS || (lfTreatment == Constants.PRESERVE && 
+                                     (bPrevWasLF || lfCheck.nextIsLF()))) {
+                           charIter.remove();
+                       }
+                       else {
+                           bInWS = true;
+                       }
+                   }
+                   break;
+
+               case CharClass.LINEFEED:
+                   /* A linefeed */
+                   lfCheck.reset();
+                   bPrevWasLF=true; // for following whitespace
+
+                   switch (lfTreatment) {
+                   case Constants.IGNORE:
+                       charIter.remove();
+                       break;
+                   case Constants.TREAT_AS_SPACE:
+                       if (bInWS) {
+                           // only if bWScollapse=true
+                           charIter.remove();
+                       }
+                       else {
+                           if (bWScollapse) bInWS=true;
+                           charIter.replaceChar('\u0020');
+                       }
+                       break;
+                   case Constants.TREAT_AS_ZERO_WIDTH_SPACE:
+                       charIter.replaceChar('\u200b');
+                       // Fall through: this isn't XML whitespace
+                   case Constants.PRESERVE:
+                       bInWS=false;
+                       break;
+                   }
+                   break;
+
+               case CharClass.EOT:
+                   //   A "boundary" objects such as non-character inline
+                   // or nested block object was encountered.
+                   // If any whitespace run in progress, finish it.
+                   // FALL THROUGH
+
+               case CharClass.UCWHITESPACE: // Non XML-whitespace
+               case CharClass.NONWHITESPACE:
+                   /* Any other character */
+                   bInWS = bPrevWasLF=false;
+                   lfCheck.reset();
+                   break;
+               }
+           }
+           firstInlineChild = null;
+       }
+    }
+
+    private static class LFchecker {
+       private boolean bNextIsLF=false;
+       private RecursiveCharIterator charIter;
+       
+       LFchecker(RecursiveCharIterator charIter) {
+           this.charIter = charIter;
+       }
+
+       boolean nextIsLF() {
+           if (bNextIsLF==false) {
+               CharIterator lfIter = charIter.mark();
+               while (lfIter.hasNext()) {
+                   char c = lfIter.nextChar();
+                   if (c == '\n') {
+                       bNextIsLF=true;
+                       break;
+                   }
+                   else if (CharClass.classOf(c)!=CharClass.XMLWHITESPACE) {
+                       break;
+                   }
+               }
+           }
+           return bNextIsLF;
+       }
+
+       void reset() {
+           bNextIsLF=false;
+       }
+    }
 }
index 46507bf9a59e264aba6f370d0f1f92153fcc802a..3bcdf934c189f2ed9036c869f65bd324010fcc5d 100644 (file)
@@ -37,6 +37,8 @@ public class Character extends FObj {
     public final static int OK = 0;
     public final static int DOESNOT_FIT = 1;
 
+    private char characterValue;
+
     public Character(FONode parent) {
         super(parent);
         this.name = "fo:character";
@@ -112,7 +114,7 @@ public class Character extends FObj {
         }
 
         // Character specific properties
-        char characterValue = this.properties.get("character").getCharacter();
+        characterValue = this.properties.get("character").getCharacter();
 
 
         // initialize id
@@ -147,4 +149,10 @@ public class Character extends FObj {
 
     }
 
+    public CharIterator charIterator() {
+       return new OneCharIterator(characterValue);
+       // But what it the character is ignored due to white space handling?
+    }
+
+
 }
index e161b3f9aa6d3746a7f4c60ff25260b336745449..c6f78e8d0ae38dbde603ceba9c7a432dda044a6a 100644 (file)
@@ -87,4 +87,9 @@ public class Inline extends FObjMixed {
         }
     }
 
+
+    public CharIterator charIterator() {
+       return new InlineCharIterator(this, propMgr.getBorderAndPadding());
+    }
+
 }
index 6c01d2e7fc84b3583b05d96db7ebfc4198e76551..d5b6d9f4e4c2b54f15691db97366c77719c9a933 100644 (file)
@@ -112,7 +112,7 @@ public class SimplePageMaster extends FObj {
                                           new Rectangle(0,0,
                                                         pageWidth,pageHeight)));
 
-        _regions = null;
+       //  _regions = null; // PageSequence access SimplePageMaster....
         children = null;
         properties = null;
     }