/* * $Id$ * ============================================================================ * The Apache Software License, Version 1.1 * ============================================================================ * * Copyright (C) 1999-2003 The Apache Software Foundation. All rights reserved. * * Redistribution and use in source and binary forms, with or without modifica- * tion, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. The end-user documentation included with the redistribution, if any, must * include the following acknowledgment: "This product includes software * developed by the Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, if * and wherever such third-party acknowledgments normally appear. * * 4. The names "FOP" and "Apache Software Foundation" must not be used to * endorse or promote products derived from this software without prior * written permission. For written permission, please contact * apache@apache.org. * * 5. Products derived from this software may not be called "Apache", nor may * "Apache" appear in their name, without prior written permission of the * Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- * DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ============================================================================ * * This software consists of voluntary contributions made by many individuals * on behalf of the Apache Software Foundation and was originally created by * James Tauber . For more information on the Apache * Software Foundation, please see . */ package org.apache.fop.util; import org.apache.fop.fonts.Font; /** * This class provides utilities to distinguish various kinds of Unicode * whitespace and to get character widths in a given FontState. */ public class CharUtilities { /** * Character code used to signal a character boundary in * inline content, such as an inline with borders and padding * or a nested block object. */ public static final char CODE_EOT = 0; /** * Character class: Unicode white space */ public static final int UCWHITESPACE = 0; /** * Character class: Line feed */ public static final int LINEFEED = 1; /** * Character class: Boundary between text runs */ public static final int EOT = 2; /** * Character class: non-whitespace */ public static final int NONWHITESPACE = 3; /** * Character class: XML whitespace */ public static final int XMLWHITESPACE = 4; /** * Utility class: Constructor prevents instantiating when subclassed. */ protected CharUtilities() { throw new UnsupportedOperationException(); } /** * Return the appropriate CharClass constant for the type * of the passed character. * @param c character to inspect * @return the determined character class */ public static int classOf(char c) { if (c == CODE_EOT) { return EOT; } if (c == '\n') { return LINEFEED; } if (c == ' ' || c == '\r' || c == '\t') { return XMLWHITESPACE; } if (isAnySpace(c)) { return UCWHITESPACE; } return NONWHITESPACE; } /** * Helper method for getting the width of a unicode char * from the current fontstate. * This also performs some guessing on widths on various * versions of space that might not exists in the font. * @param c character to inspect * @param fs FontState to use * @return the width of the character */ public static int getCharWidth(char c, Font fs) { int width; if ((c == '\n') || (c == '\r') || (c == '\t') || (c == '\u00A0')) { width = getCharWidth(' ', fs); } else { width = fs.getWidth(fs.mapChar(c)); if (width <= 0) { // Estimate the width of spaces not represented in // the font int em = fs.getWidth(fs.mapChar('m')); int en = fs.getWidth(fs.mapChar('n')); if (em <= 0) { em = 500 * fs.getFontSize(); } if (en <= 0) { en = em - 10; } if (c == ' ') { width = em; } if (c == '\u2000') { width = en; } if (c == '\u2001') { width = em; } if (c == '\u2002') { width = em / 2; } if (c == '\u2003') { width = fs.getFontSize(); } if (c == '\u2004') { width = em / 3; } if (c == '\u2005') { width = em / 4; } if (c == '\u2006') { width = em / 6; } if (c == '\u2007') { width = getCharWidth(' ', fs); } if (c == '\u2008') { width = getCharWidth('.', fs); } if (c == '\u2009') { width = em / 5; } if (c == '\u200A') { width = 5; } if (c == '\u200B') { width = 100; } if (c == '\u202F') { width = getCharWidth(' ', fs) / 2; } if (c == '\u3000') { width = getCharWidth(' ', fs) * 2; } } } return width; } /** * Calculates the word width. */ public static int getWordWidth(String word, Font fs) { if (word == null) return 0; int wordLength = word.length(); int width = 0; char[] characters = new char[wordLength]; word.getChars(0, wordLength, characters, 0); for (int i = 0; i < wordLength; i++) { width += getCharWidth(characters[i], fs); } return width; } /** * Helper method to determine if the character is a * space with normal behaviour. Normal behaviour means that * it's not non-breaking. * @param c character to inspect * @return True if the character is a normal space */ public static boolean isSpace(char c) { return (c == ' ' || (c >= '\u2000' && c <= '\u200B')); // c == '\u2000' // en quad // c == '\u2001' // em quad // c == '\u2002' // en space // c == '\u2003' // em space // c == '\u2004' // three-per-em space // c == '\u2005' // four--per-em space // c == '\u2006' // six-per-em space // c == '\u2007' // figure space // c == '\u2008' // punctuation space // c == '\u2009' // thin space // c == '\u200A' // hair space // c == '\u200B' // zero width space } /** * Method to determine if the character is a nonbreaking * space. * @param c character to check * @return True if the character is a nbsp */ public static boolean isNBSP(char c) { return (c == '\u00A0' // no-break space || c == '\u202F' // narrow no-break space || c == '\u3000' // ideographic space || c == '\uFEFF'); // zero width no-break space } /** * Determines if the character represents any kind of space. * @param c character to check * @return True if the character represents any kind of space */ public static boolean isAnySpace(char c) { boolean ret = (isSpace(c) || isNBSP(c)); return ret; } }