/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.fo; import java.util.List; import java.util.Stack; import org.apache.fop.fo.flow.Block; import org.apache.fop.util.CharUtilities; /** * Class encapsulating the functionality for white-space-handling * during refinement stage. * The handleWhiteSpace() methods are called during * FOTree-building and marker-cloning: *
* *
* Each time one of the variants is called, white-space is handled * for all FOText or Character nodes that * were added: *
* *
* The iteration always starts at firstTextNode, * goes on until the last text-node is reached, and deals only * with FOText or Character nodes. *
* Note: if the method is called from an inline's endOfNode(), * there is too little context to decide whether trailing * white-space may be removed, so the pending inline is stored * in a List, together with an iterator for which the next() * method returns the first in the trailing sequence of white- * space characters. This List is processed again at the end * of the ancestor block. */ public class XMLWhiteSpaceHandler { /** True if we are in a run of white space */ private boolean inWhiteSpace = false; /** True if the last char was a linefeed */ private boolean afterLinefeed = true; /** Counter, increased every time a non-white-space is encountered */ private int nonWhiteSpaceCount; private int linefeedTreatment; private int whiteSpaceTreatment; private int whiteSpaceCollapse; private boolean endOfBlock; private boolean nextChildIsBlockLevel; private RecursiveCharIterator charIter; private List pendingInlines; private Stack nestedBlockStack = new java.util.Stack(); private CharIterator firstWhiteSpaceInSeq; /** * Handle white-space for the fo that is passed in, starting at * firstTextNode * @param fo the FO for which to handle white-space * @param firstTextNode the node at which to start * @param nextChild the node that will be added to the list * after firstTextNode */ public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode, FONode nextChild) { Block currentBlock = null; int foId = fo.getNameId(); /* set the current block */ switch (foId) { case Constants.FO_BLOCK: currentBlock = (Block) fo; if (nestedBlockStack.empty() || fo != nestedBlockStack.peek()) { if (nextChild != null) { /* if already in a block, push the current block * onto the stack of nested blocks */ nestedBlockStack.push(currentBlock); } } else { if (nextChild == null) { nestedBlockStack.pop(); } } break; case Constants.FO_RETRIEVE_MARKER: /* look for the nearest block ancestor, if any */ FONode ancestor = fo; do { ancestor = ancestor.getParent(); } while (ancestor.getNameId() != Constants.FO_BLOCK && ancestor.getNameId() != Constants.FO_STATIC_CONTENT); if (ancestor.getNameId() == Constants.FO_BLOCK) { currentBlock = (Block) ancestor; nestedBlockStack.push(currentBlock); } break; default: if (!nestedBlockStack.empty()) { currentBlock = (Block) nestedBlockStack.peek(); } } if (currentBlock != null) { linefeedTreatment = currentBlock.getLinefeedTreatment(); whiteSpaceCollapse = currentBlock.getWhitespaceCollapse(); whiteSpaceTreatment = currentBlock.getWhitespaceTreatment(); } else { linefeedTreatment = Constants.EN_TREAT_AS_SPACE; whiteSpaceCollapse = Constants.EN_TRUE; whiteSpaceTreatment = Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED; } endOfBlock = (nextChild == null && fo == currentBlock); if (firstTextNode == null) { //no text means no white-space to handle; return early afterLinefeed = (fo == currentBlock && fo.firstChild == null); nonWhiteSpaceCount = 0; if (endOfBlock) { handlePendingInlines(); } return; } charIter = new RecursiveCharIterator(fo, firstTextNode); inWhiteSpace = false; if (fo == currentBlock || currentBlock == null || (foId == Constants.FO_RETRIEVE_MARKER && fo.getParent() == currentBlock)) { if (firstTextNode == fo.firstChild) { afterLinefeed = true; } else { int previousChildId = firstTextNode.siblings[0].getNameId(); afterLinefeed = (previousChildId == Constants.FO_BLOCK || previousChildId == Constants.FO_TABLE_AND_CAPTION || previousChildId == Constants.FO_TABLE || previousChildId == Constants.FO_LIST_BLOCK || previousChildId == Constants.FO_BLOCK_CONTAINER); } } if (foId == Constants.FO_WRAPPER) { FONode parent = fo.parent; int parentId = parent.getNameId(); while (parentId == Constants.FO_WRAPPER) { parent = parent.parent; parentId = parent.getNameId(); } if (parentId == Constants.FO_FLOW || parentId == Constants.FO_STATIC_CONTENT || parentId == Constants.FO_BLOCK_CONTAINER || parentId == Constants.FO_TABLE_CELL) { endOfBlock = (nextChild == null); } } if (nextChild != null) { int nextChildId = nextChild.getNameId(); nextChildIsBlockLevel = ( nextChildId == Constants.FO_BLOCK || nextChildId == Constants.FO_TABLE_AND_CAPTION || nextChildId == Constants.FO_TABLE || nextChildId == Constants.FO_LIST_BLOCK || nextChildId == Constants.FO_BLOCK_CONTAINER); } else { nextChildIsBlockLevel = false; } handleWhiteSpace(); if (fo == currentBlock && (endOfBlock || nextChildIsBlockLevel)) { handlePendingInlines(); } if (nextChild == null) { if (fo != currentBlock) { /* current FO is not a block, and is about to end */ if (nonWhiteSpaceCount > 0 && pendingInlines != null) { /* there is non-white-space text between the pending * inline(s) and the end of the non-block node; * clear list of pending inlines */ pendingInlines.clear(); } if (inWhiteSpace) { /* means there is at least one trailing space in the inline FO that is about to end */ addPendingInline(fo); } } else { /* end of block: clear the references and pop the * nested block stack */ if (!nestedBlockStack.empty()) { nestedBlockStack.pop(); } charIter = null; firstWhiteSpaceInSeq = null; } } } /** * Reset the handler, release all references */ protected final void reset() { if (pendingInlines != null) { pendingInlines.clear(); } nestedBlockStack.clear(); charIter = null; firstWhiteSpaceInSeq = null; } /** * Handle white-space for the fo that is passed in, starting at * firstTextNode (when a nested FO is encountered) * @param fo the FO for which to handle white-space * @param firstTextNode the node at which to start */ public void handleWhiteSpace(FObjMixed fo, FONode firstTextNode) { handleWhiteSpace(fo, firstTextNode, null); } private void handleWhiteSpace() { EOLchecker lfCheck = new EOLchecker(charIter); nonWhiteSpaceCount = 0; while (charIter.hasNext()) { if (!inWhiteSpace) { firstWhiteSpaceInSeq = charIter.mark(); } char currentChar = charIter.nextChar(); int currentCharClass = CharUtilities.classOf(currentChar); if (currentCharClass == CharUtilities.LINEFEED && linefeedTreatment == Constants.EN_TREAT_AS_SPACE) { // if we have a linefeed and it is supposed to be treated // like a space, that's what we do and continue currentChar = '\u0020'; charIter.replaceChar('\u0020'); currentCharClass = CharUtilities.classOf(currentChar); } switch (CharUtilities.classOf(currentChar)) { case CharUtilities.XMLWHITESPACE: // Some kind of whitespace character, except linefeed. if (inWhiteSpace && whiteSpaceCollapse == Constants.EN_TRUE) { // We are in a run of whitespace and should collapse // Just delete the char charIter.remove(); } else { // Do the white space treatment here boolean bIgnore = false; switch (whiteSpaceTreatment) { case Constants.EN_IGNORE: bIgnore = true; break; case Constants.EN_IGNORE_IF_BEFORE_LINEFEED: bIgnore = lfCheck.beforeLinefeed(); break; case Constants.EN_IGNORE_IF_SURROUNDING_LINEFEED: bIgnore = afterLinefeed || lfCheck.beforeLinefeed(); break; case Constants.EN_IGNORE_IF_AFTER_LINEFEED: bIgnore = afterLinefeed; break; case Constants.EN_PRESERVE: //nothing to do now, replacement takes place later break; default: //nop } // Handle ignore and replacement if (bIgnore) { charIter.remove(); } else { // this is to retain a single space between words inWhiteSpace = true; if (currentChar != '\u0020') { charIter.replaceChar('\u0020'); } } } break; case CharUtilities.LINEFEED: // A linefeed switch (linefeedTreatment) { case Constants.EN_IGNORE: charIter.remove(); break; case Constants.EN_TREAT_AS_ZERO_WIDTH_SPACE: charIter.replaceChar(CharUtilities.ZERO_WIDTH_SPACE); inWhiteSpace = false; break; case Constants.EN_PRESERVE: lfCheck.reset(); inWhiteSpace = false; afterLinefeed = true; // for following whitespace break; default: //nop } break; case CharUtilities.EOT: // A "boundary" objects such as non-character inline // or nested block object was encountered. (? can't happen) // If any whitespace run in progress, finish it. // FALL THROUGH default: // Any other character inWhiteSpace = false; afterLinefeed = false; nonWhiteSpaceCount++; lfCheck.reset(); break; } } } private void addPendingInline(FObjMixed fo) { if (pendingInlines == null) { pendingInlines = new java.util.ArrayList(5); } pendingInlines.add(new PendingInline(fo, firstWhiteSpaceInSeq)); } private void handlePendingInlines() { if (!(pendingInlines == null || pendingInlines.isEmpty())) { if (nonWhiteSpaceCount == 0) { /* handle white-space for all pending inlines*/ PendingInline p; for (int i = pendingInlines.size(); --i >= 0;) { p = (PendingInline)pendingInlines.get(i); charIter = (RecursiveCharIterator)p.firstTrailingWhiteSpace; handleWhiteSpace(); pendingInlines.remove(p); } } else { /* there is non-white-space text between the pending * inline(s) and the end of the block; * clear list of pending inlines */ pendingInlines.clear(); } } } /** * Helper class, used during white-space handling to look ahead, and * see if the next character is a linefeed (or if there will be * an equivalent effect during layout, i.e. end-of-block or * the following child is a block-level FO) */ private class EOLchecker { private boolean nextIsEOL = false; private RecursiveCharIterator charIter; EOLchecker(CharIterator charIter) { this.charIter = (RecursiveCharIterator) charIter; } boolean beforeLinefeed() { if (!nextIsEOL) { CharIterator lfIter = charIter.mark(); while (lfIter.hasNext()) { int charClass = CharUtilities.classOf(lfIter.nextChar()); if (charClass == CharUtilities.LINEFEED) { if (linefeedTreatment == Constants.EN_PRESERVE) { nextIsEOL = true; return nextIsEOL; } } else if (charClass != CharUtilities.XMLWHITESPACE) { return nextIsEOL; } } // No more characters == end of text run // means EOL if there either is a nested block to be added, // or if this is the last text node in the current block nextIsEOL = nextChildIsBlockLevel || endOfBlock; } return nextIsEOL; } void reset() { nextIsEOL = false; } } /** * Helper class to store unfinished inline nodes together * with an iterator that starts at the first white-space * character in the sequence of trailing white-space */ private class PendingInline { protected FObjMixed fo; protected CharIterator firstTrailingWhiteSpace; PendingInline(FObjMixed fo, CharIterator firstTrailingWhiteSpace) { this.fo = fo; this.firstTrailingWhiteSpace = firstTrailingWhiteSpace; } } }