diff options
author | fotis <fotis@unknown> | 2000-12-12 19:18:50 +0000 |
---|---|---|
committer | fotis <fotis@unknown> | 2000-12-12 19:18:50 +0000 |
commit | c6eaec4c8bb1a18dc909bc65c35ede58dd0fcfee (patch) | |
tree | 79443dabcc441bf86b8edb6e560773652b055538 /src/org | |
parent | 5668bdab96cbfac4b3c6148a48d81986322280f2 (diff) | |
download | xmlgraphics-fop-c6eaec4c8bb1a18dc909bc65c35ede58dd0fcfee.tar.gz xmlgraphics-fop-c6eaec4c8bb1a18dc909bc65c35ede58dd0fcfee.zip |
adds hyphenation (Carlos Villegas, Fotis Jannidis) and simple configuration support (F.J.)
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@193861 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/org')
24 files changed, 3935 insertions, 170 deletions
diff --git a/src/org/apache/fop/apps/CommandLine.java b/src/org/apache/fop/apps/CommandLine.java index d817f2587..3918bc039 100644 --- a/src/org/apache/fop/apps/CommandLine.java +++ b/src/org/apache/fop/apps/CommandLine.java @@ -66,11 +66,12 @@ import java.io.PrintWriter; import java.io.IOException; import java.io.FileNotFoundException; import java.net.URL; +import java.io.InputStream; + // FOP import org.apache.fop.messaging.MessageHandler; - /** * mainline class. * @@ -82,6 +83,8 @@ public class CommandLine { private String foFile = null; private String pdfFile = null; + private String userConfigFile = null; + private String baseDir = null; /** show a full dump on error */ private static boolean errorDump = false; @@ -90,6 +93,8 @@ public class CommandLine { for (int i = 0; i < args.length; i++) { if (args[i].equals("-d") || args[i].equals("--full-error-dump")) { errorDump = true; + } else if ((args[i].charAt(0) == '-') && (args[i].charAt(1) == 'c')) { + userConfigFile = args[i].substring(2); } else if (args[i].charAt(0) == '-') { printUsage(args[i]); } else if (foFile == null) { @@ -108,16 +113,27 @@ public class CommandLine { public void printUsage(String arg) { if (arg != null) { MessageHandler.errorln("Unkown argument: '"+arg + "'"); - MessageHandler.errorln("Usage: java [-d]" + + MessageHandler.errorln("Usage: java [-d] " + + "[-cMyConfigFile] " + "org.apache.fop.apps.CommandLine " + "formatting-object-file pdf-file"); MessageHandler.errorln("Options:\n" + " -d or --full-error-dump Show stack traces upon error"); + MessageHandler.errorln("-cMyConfigFile use values in configuration file MyConfigFile instead of default"); System.exit(1); } } - public void run() { + Driver driver = new Driver(); + driver.loadStandardConfiguration("standard"); +// driver.loadStandardConfiguration("pdf"); + if (userConfigFile != null) { + driver.loadUserconfiguration(userConfigFile,"standard"); + } + driver.setBaseDir(foFile); + + String version = Version.getVersion(); + MessageHandler.logln(version); XMLReader parser = createParser(); @@ -140,7 +156,6 @@ public class CommandLine { } try { - Driver driver = new Driver(); driver.setErrorDump(errorDump); driver.setRenderer("org.apache.fop.render.pdf.PDFRenderer", Version.getVersion()); @@ -216,7 +231,7 @@ public class CommandLine { * @param filename the name of the file * @return the InputSource created */ - protected static InputSource fileInputSource(String filename) { + public static InputSource fileInputSource(String filename) { /* this code adapted from James Clark's in XT */ File file = new File(filename); @@ -242,9 +257,6 @@ public class CommandLine { * @param command line arguments */ public static void main(String[] args) { - String version = Version.getVersion(); - MessageHandler.errorln(version); - CommandLine cmdLine = new CommandLine(args); cmdLine.run(); diff --git a/src/org/apache/fop/apps/Driver.java b/src/org/apache/fop/apps/Driver.java index 46b922970..efa3192ba 100644 --- a/src/org/apache/fop/apps/Driver.java +++ b/src/org/apache/fop/apps/Driver.java @@ -59,6 +59,9 @@ import org.apache.fop.layout.AreaTree; import org.apache.fop.layout.FontInfo; import org.apache.fop.render.Renderer; import org.apache.fop.messaging.MessageHandler; +import org.apache.fop.configuration.ConfigurationReader; +import org.apache.fop.configuration.StandardConfiguration; + // DOM import org.w3c.dom.Document; @@ -76,6 +79,8 @@ import org.xml.sax.helpers.AttributesImpl; // Java import java.io.PrintWriter; import java.io.IOException; +import java.io.InputStream; +import java.io.File; /** * <P>Primary class that drives overall FOP process. @@ -130,7 +135,7 @@ public class Driver { protected PrintWriter writer; /** If true, full error stacks are reported */ - protected boolean errorDump; + protected boolean errorDump = false; /** create a new Driver */ public Driver() { @@ -138,8 +143,8 @@ public class Driver { } /** Set the error dump option - * @param dump if true, full stacks will be reported to the error log - */ + * @param dump if true, full stacks will be reported to the error log + */ public void setErrorDump(boolean dump) { errorDump = dump; } @@ -150,18 +155,18 @@ public class Driver { } /** - * set the class name of the Renderer to use as well as the - * producer string for those renderers that can make use of it - */ + * set the class name of the Renderer to use as well as the + * producer string for those renderers that can make use of it + */ public void setRenderer(String rendererClassName, String producer) { this.renderer = createRenderer(rendererClassName); this.renderer.setProducer(producer); } /** - * protected method used by setRenderer(String, String) to - * instantiate the Renderer class - */ + * protected method used by setRenderer(String, String) to + * instantiate the Renderer class + */ protected Renderer createRenderer(String rendererClassName) { MessageHandler.logln("using renderer " + rendererClassName); @@ -185,26 +190,26 @@ public class Driver { } /** - * add the given element mapping. - * - * an element mapping maps element names to Java classes - */ + * add the given element mapping. + * + * an element mapping maps element names to Java classes + */ public void addElementMapping(ElementMapping mapping) { mapping.addToBuilder(this.treeBuilder); } /** - * add the element mapping with the given class name - */ + * add the element mapping with the given class name + */ public void addElementMapping(String mappingClassName) { createElementMapping(mappingClassName).addToBuilder( this.treeBuilder); } /** - * protected method used by addElementMapping(String) to - * instantiate element mapping class - */ + * protected method used by addElementMapping(String) to + * instantiate element mapping class + */ protected ElementMapping createElementMapping( String mappingClassName) { MessageHandler.logln("using element mapping " + mappingClassName); @@ -233,16 +238,16 @@ public class Driver { } /** - * add the element mapping with the given class name - */ + * add the element mapping with the given class name + */ public void addPropertyList(String listClassName) { createPropertyList(listClassName).addToBuilder(this.treeBuilder); } /** - * protected method used by addPropertyList(String) to - * instantiate list mapping class - */ + * protected method used by addPropertyList(String) to + * instantiate list mapping class + */ protected PropertyListMapping createPropertyList( String listClassName) { MessageHandler.logln("using property list mapping " + @@ -272,20 +277,20 @@ public class Driver { } /** - * return the tree builder (a SAX DocumentHandler). - * - * used in situations where SAX is used but not via a FOP-invoked - * SAX parser. A good example is an XSLT engine that fires SAX - * events but isn't a SAX Parser itself. - */ + * return the tree builder (a SAX DocumentHandler). + * + * used in situations where SAX is used but not via a FOP-invoked + * SAX parser. A good example is an XSLT engine that fires SAX + * events but isn't a SAX Parser itself. + */ public ContentHandler getContentHandler() { return this.treeBuilder; } /** - * build the formatting object tree using the given SAX Parser and - * SAX InputSource - */ + * build the formatting object tree using the given SAX Parser and + * SAX InputSource + */ public void buildFOTree(XMLReader parser, InputSource source) throws FOPException { @@ -308,8 +313,8 @@ public class Driver { } /** - * build the formatting object tree using the given DOM Document - */ + * build the formatting object tree using the given DOM Document + */ public void buildFOTree(Document document) throws FOPException { /* most of this code is modified from John Cowan's */ @@ -337,7 +342,7 @@ public class Driver { int datalen = data.length(); if (array == null || array.length < datalen) { /* if the array isn't big enough, make a new - one */ + one */ array = new char[datalen]; } data.getChars(0, datalen, array, 0); @@ -399,8 +404,8 @@ public class Driver { } /** - * Dumps an error - */ + * Dumps an error + */ public void dumpError(Exception e) { if (errorDump) { if (e instanceof SAXException) { @@ -417,16 +422,16 @@ public class Driver { /** - * set the PrintWriter to use to output the result of the Renderer - * (if applicable) - */ + * set the PrintWriter to use to output the result of the Renderer + * (if applicable) + */ public void setWriter(PrintWriter writer) { this.writer = writer; } /** - * format the formatting object tree into an area tree - */ + * format the formatting object tree into an area tree + */ public void format() throws FOPException { FontInfo fontInfo = new FontInfo(); this.renderer.setupFontInfo(fontInfo); @@ -438,9 +443,86 @@ public class Driver { } /** - * render the area tree to the output form - */ + * render the area tree to the output form + */ public void render() throws IOException, FOPException { this.renderer.render(areaTree, this.writer); } + + /** + * loads standard configuration file and a user file, if it has been specified + */ + public void loadStandardConfiguration(String role) { + String file; + if (role.equals("standard")) { + file = "config.xml"; + } else if (role.equals("pdf")) { + file = "pdf.xml"; + } else if (role.equals("awt")) { + file = "awt.xml"; + } else { + MessageHandler.errorln("Error: unknown configuration role: " + role + + "\n using standard"); + file = "config.xml"; + } + // the entry /conf/config.xml refers to a directory conf which is a sibling of org + InputStream configfile = + ConfigurationReader.class.getResourceAsStream("/conf/"+file); + if (configfile == null) { + MessageHandler.errorln("Fatal error: can't find default configuration file"); + System.exit(1); + } + MessageHandler.logln("reading default configuration file"); + ConfigurationReader reader = + new ConfigurationReader (new InputSource(configfile), + "standard"); + if (errorDump) { + reader.setDumpError(true); + } + try { + reader.start(); + } catch (org.apache.fop.apps.FOPException error) { + MessageHandler.errorln("Fatal Error: Can't process default configuration file. \nProbably it is not well-formed."); + if (errorDump) { + reader.dumpError(error); + } + System.exit(1); + } + } + + public void loadUserconfiguration(String userConfigFile, String role) { + //read user configuration file + if (userConfigFile != null) { + MessageHandler.logln("reading user configuration file"); + ConfigurationReader reader = new ConfigurationReader ( + CommandLine.fileInputSource(userConfigFile), role); + if (errorDump) { + reader.setDumpError(true); + } + try { + reader.start(); + } catch (org.apache.fop.apps.FOPException error) { + MessageHandler.errorln( + "Can't find user configuration file " + + userConfigFile); + MessageHandler.errorln("using default values"); + if (errorDump) { + reader.dumpError(error); + } + } + } + } + + public void setBaseDir(String fofile) { + String baseDir = StandardConfiguration.getStringValue("baseDir"); + if (baseDir == null) { + baseDir = new File(new File(fofile).getAbsolutePath()).getParent(); + StandardConfiguration.put("baseDir",baseDir); + } + if (errorDump) { + MessageHandler.logln("base directory: " + baseDir); + } + } + + } diff --git a/src/org/apache/fop/apps/Version.java b/src/org/apache/fop/apps/Version.java index 2201df324..9af596207 100644 --- a/src/org/apache/fop/apps/Version.java +++ b/src/org/apache/fop/apps/Version.java @@ -51,6 +51,7 @@ package org.apache.fop.apps; +import org.apache.fop.configuration.StandardConfiguration; /** * class representing the version of FOP. */ @@ -62,6 +63,6 @@ public class Version { * @return the version string */ public static String getVersion() { - return "FOP 0.15"; + return StandardConfiguration.getStringValue("version"); } } diff --git a/src/org/apache/fop/apps/XalanCommandLine.java b/src/org/apache/fop/apps/XalanCommandLine.java index a82c41859..9fec8a7d6 100644 --- a/src/org/apache/fop/apps/XalanCommandLine.java +++ b/src/org/apache/fop/apps/XalanCommandLine.java @@ -68,6 +68,7 @@ import java.io.FileWriter; import java.io.PrintWriter; import java.io.BufferedWriter; import java.io.IOException; +import java.io.InputStream; import java.io.FileNotFoundException; import java.net.URL; @@ -80,7 +81,7 @@ import org.apache.xalan.xslt.XSLTResultTarget; // FOP import org.apache.fop.messaging.MessageHandler; - +import org.apache.fop.configuration.ConfigurationReader; /** * mainline class. @@ -91,6 +92,11 @@ import org.apache.fop.messaging.MessageHandler; */ public class XalanCommandLine { + private String userConfigFile = null; + + /** show a full dump on error */ //this should be implemented here too + private static boolean errorDump = false; + /** * creates a SAX parser, using the value of org.xml.sax.parser * defaulting to org.apache.xerces.parsers.SAXParser @@ -150,6 +156,7 @@ public class XalanCommandLine { } } + /** * mainline method * @@ -160,16 +167,26 @@ public class XalanCommandLine { * @param command line arguments */ public static void main(String[] args) { - String version = Version.getVersion(); - MessageHandler.logln(version); - - if (args.length != 3) { MessageHandler.errorln("usage: java " + "org.apache.fop.apps.XalanCommandLine " + "xml-file xslt-file pdf-file"); System.exit(1); } + Driver driver = new Driver(); + driver.loadStandardConfiguration("standard"); +//must be redone like CommandLine +/* if (userConfigFile != null) { + driver.loadUserconfiguration(userConfigFile,"standard"); + } +*/ + driver.setBaseDir(args[0]); + + String version = Version.getVersion(); + MessageHandler.logln(version); + + + XMLReader parser = createParser(); if (parser == null) { @@ -238,7 +255,6 @@ public class XalanCommandLine { writer.close(); //set Driver methods to start Fop processing - Driver driver = new Driver(); driver.setRenderer("org.apache.fop.render.pdf.PDFRenderer", version); driver.addElementMapping("org.apache.fop.fo.StandardElementMapping"); diff --git a/src/org/apache/fop/configuration/AWTConfiguration.java b/src/org/apache/fop/configuration/AWTConfiguration.java new file mode 100644 index 000000000..330e51979 --- /dev/null +++ b/src/org/apache/fop/configuration/AWTConfiguration.java @@ -0,0 +1,141 @@ +package org.apache.fop.configuration; + +import java.util.Vector; +import java.util.Hashtable; +import java.util.Enumeration; + +public class AWTConfiguration { + + /** stores the configuration information */ + private static Hashtable configuration; + + + /** + * general access method + * + * @param key a string containing the key value for the configuration value + * @return Object containing the value; normally you would use one of the + * convenience methods, which return the correct form. + * null if the key is not defined. + */ + public static Object getValue (String key){ + return configuration.get(key); + }; + + /** + * convenience methods to access strings values in the configuration + * @param key a string containing the key value for the configuration value + * @return String a string containing the value + * null if the key is not defined. + */ + public static String getStringValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof String) { + return (String) obj; + } else { + return null; + } + }; + + /** + * convenience methods to access int values in the configuration + * @param key a string containing the key value for the configuration value + * @return int a int containing the value + * -1 if the key is not defined. + */ + public static int getIntValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof String) { + return Integer.parseInt((String) obj); + } else { + return -1; + } + }; + + /** + * convenience methods to access list values in the configuration + * @param key a string containing the key value for the configuration value + * @return Vector a Vector containing the values + * null if the key is not defined. + */ + public static Vector getListValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof Vector) { + return (Vector) obj; + } else { + return null; + } + }; + + /** + * convenience methods to access map/hashtable values in the configuration + * @param key a string containing the key value for the configuration value + * @return Hashtable a Hashtable containing the values + * null if the key is not defined. + */ + public static Hashtable getHashtableValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof Hashtable) { + return (Hashtable) obj; + } else { + return null; + } + }; + + /** + * adds information to the configuration map/hashtable in key,value form + * @param key a string containing the key value for the configuration value + * @param value an Object containing the value; can be a String, a Vector or a Hashtable + */ + public static void put(String key,Object value){ + configuration.put(key,value); + }; + + /** + * debug methods, which writes out all information in this configuration + */ + public static void dumpConfiguration() { + String key; + Object value; + Vector list; + Hashtable map; + Enumeration enum; + String tmp; + System.out.println("Dumping standard configuration: "); + Enumeration enumeration = configuration.keys(); + while (enumeration.hasMoreElements()) { + key = (String) enumeration.nextElement(); + System.out.print(" key: " + key); + value = configuration.get(key); + if (value instanceof String) { + System.out.println(" value: " + value); + } else if (value instanceof Vector) { + list = (Vector) value; + enum = list.elements(); + System.out.print(" value: "); + while (enum.hasMoreElements()) { + System.out.print( enum.nextElement() + " - "); + } + System.out.println(""); + } else if (value instanceof Hashtable) { + map = (Hashtable) value; + enum = map.keys(); + while (enum.hasMoreElements()) { + tmp = (String) enum.nextElement(); + System.out.print(" " + tmp + ":" + map.get(tmp)); + } + System.out.println(""); + } + } + + } + + /** + * initializes this configuration + * @param config contains the configuration information + */ + public static void setup(Hashtable config){ + configuration = config; + } + +} diff --git a/src/org/apache/fop/configuration/ConfigurationParser.java b/src/org/apache/fop/configuration/ConfigurationParser.java new file mode 100644 index 000000000..e99a6cd45 --- /dev/null +++ b/src/org/apache/fop/configuration/ConfigurationParser.java @@ -0,0 +1,202 @@ +/* + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.configuration; + +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.Attributes; +import org.xml.sax.Locator; +import java.util.Hashtable; +import java.util.Vector; + +import org.apache.fop.messaging.MessageHandler; + +/** + * SAX2 Handler which parses the events and stores them in a Configuration. + * Normally this class must not accessed directly. + */ + +public class ConfigurationParser extends DefaultHandler { + private final int OUT = 0; + private final int IN_ENTRY = 1; + private final int IN_KEY = 2; + private final int IN_VALUE = 4; + private final int IN_LIST = 8; + private final int IN_SUBENTRY = 16; + private final int IN_SUBKEY = 32; + + private final int STRING = 0; + private final int LIST = 1; + private final int MAP = 2; + + //state of parser + private int status = OUT; + private int datatype = -1; + + //stores the result configuration + private static Hashtable configuration = new Hashtable(20); + + //stores key for new config entry + private String key = ""; + + //stores string value + private String value = ""; + + //stores key for new config entry + private String subkey = ""; + + //stores list value + private Vector list = new Vector (15); + + //stores hashtable value + private Hashtable map = new Hashtable(15); + + /** locator for line number information */ + private Locator locator; + + + /** get locator for position information */ + public void setDocumentLocator(Locator locator) { + this.locator = locator; + } + + /** + * extracts the element and attribute name and sets the fitting status and datatype values + * */ + public void startElement(String uri, String localName, + String qName, Attributes attributes) { + if (localName.equals("key")) { + status += IN_KEY; + } else if (localName.equals("value")) { + status += IN_VALUE; + } else if (localName.equals("list")) { + status += IN_LIST; + } else if (localName.equals("subentry")) { + status += IN_SUBENTRY; + } else if (localName.equals("configuration") || + localName.equals("entry") || localName.equals("datatype")) { + } else { + //to make sure that user knows about false tag + MessageHandler.errorln( + "Unknown tag in configuration file: " + localName); + } + } //end startElement + + /** + * stores subentries or entries into their hashes (map for subentries, configuration for entry) + */ + public void endElement(String uri, String localName, String qName) { + if (localName.equals("entry")) { + int tek = 0; + switch (datatype) { + case STRING: + configuration.put(key, value); + break; + case LIST: + configuration.put(key, list); + break; + case MAP: + configuration.put(key, map); + } + status = OUT; + } else if (localName.equals("subentry")) { + map.put(subkey, value); + status -= IN_SUBENTRY; + } else if (localName.equals("key")) { + status -= IN_KEY; + } else if (localName.equals("list")) { + status -= IN_LIST; + } else if (localName.equals("value")) { + status -= IN_VALUE; + } + + } + + /** + * extracts characters from text nodes and puts them into their respective + * variables + */ + public void characters(char[] ch, int start, int length) { + char characters [] = new char [length]; + System.arraycopy(ch, start, characters, 0, length); + String text = new String(characters); + switch (status) { + case IN_KEY: + key = text; + break; + case IN_LIST + IN_SUBENTRY + IN_KEY: + subkey = text; + break; + case IN_VALUE: + value = text; + datatype = STRING; + break; + case IN_LIST + IN_VALUE: + list.add(text); + datatype = LIST; + break; + case IN_LIST + IN_SUBENTRY + IN_VALUE: + value = text; + datatype = MAP; + break; + } + + } //end characters + + /** + * returns the parsed configuration information + * @return Hashtable containing the configuration information as key/value pairs + */ + public Hashtable getConfiguration() { + return configuration; + } + +} diff --git a/src/org/apache/fop/configuration/ConfigurationReader.java b/src/org/apache/fop/configuration/ConfigurationReader.java new file mode 100644 index 000000000..798a35698 --- /dev/null +++ b/src/org/apache/fop/configuration/ConfigurationReader.java @@ -0,0 +1,227 @@ +/* + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.configuration; + +//sax +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.XMLReader; +import org.xml.sax.SAXException; +import java.io.IOException; +import org.xml.sax.InputSource; + +//fop +import org.apache.fop.messaging.MessageHandler; +import org.apache.fop.apps.FOPException; +import org.apache.fop.configuration.AWTConfiguration; +import org.apache.fop.configuration.PDFConfiguration; +import org.apache.fop.configuration.StandardConfiguration; + +/** + * entry class for reading configuration from file and creating a configuration + * class. typical use looks like that: <br> + * + * <code>ConfigurationReader reader = new ConfigurationReader ("config.xml","standard"); + * try { + * reader.start(); + * } catch (org.apache.fop.apps.FOPException error) { + * reader.dumpError(error); + * } + * </code> + * Once the configuration has been setup, the information can be accessed with + * the methods of StandardConfiguration. + */ + +public class ConfigurationReader { + /** show a full dump on error */ + private static boolean errorDump = false; + + /** inputsource for configuration file */ + private InputSource filename; + + private String role ; + + /** + * creates a configuration reader + * @param filename the file which contains the configuration information + * @param role three values are recognized: awt, pdf, standard + */ + public ConfigurationReader (InputSource filename, String role) { + this.filename = filename; + this.role = role; + } + + + /** + * intantiates parser and starts parsing of config file + */ + public void start () throws FOPException { + XMLReader parser = createParser(); + + if (parser == null) { + MessageHandler.errorln("ERROR: Unable to create SAX parser"); + System.exit(1); + } + + // setting the parser features + try { + parser.setFeature("http://xml.org/sax/features/namespace-prefixes", + false); + } catch (SAXException e) { + MessageHandler.errorln("You need a parser which supports SAX version 2"); + if (errorDump) { + e.printStackTrace(); + } + System.exit(1); + } + ConfigurationParser configurationParser = new ConfigurationParser(); + parser.setContentHandler(configurationParser); + + try { + parser.parse(filename); + if (role.equalsIgnoreCase("standard")) { + StandardConfiguration.setup( + configurationParser.getConfiguration()); + } else if (role.equalsIgnoreCase("pdf")) { + PDFConfiguration.setup( + configurationParser.getConfiguration()); + } else if (role.equalsIgnoreCase("awt")) { + AWTConfiguration.setup( + configurationParser.getConfiguration()); + } + } catch (SAXException e) { + if (e.getException() instanceof FOPException) { + dumpError(e.getException()); + throw (FOPException) e.getException(); + } else { + dumpError(e); + throw new FOPException(e.getMessage()); + } + } + catch (IOException e) { + dumpError(e); + throw new FOPException(e.getMessage()); + } + } + + + /** + * creates a SAX parser, using the value of org.xml.sax.parser + * defaulting to org.apache.xerces.parsers.SAXParser + * + * @return the created SAX parser + */ + static XMLReader createParser() { + String parserClassName = System.getProperty("org.xml.sax.parser"); + if (parserClassName == null) { + parserClassName = "org.apache.xerces.parsers.SAXParser"; + } + if (errorDump) { + MessageHandler.logln( "configuration reader using SAX parser " + + parserClassName); + } + + try { + return (XMLReader) Class.forName( + parserClassName).newInstance(); + } catch (ClassNotFoundException e) { + MessageHandler.errorln("Could not find " + parserClassName); + if (errorDump) { + e.printStackTrace(); + } + } + catch (InstantiationException e) { + MessageHandler.errorln("Could not instantiate " + + parserClassName); + if (errorDump) { + e.printStackTrace(); + } + } + catch (IllegalAccessException e) { + MessageHandler.errorln("Could not access " + parserClassName); + if (errorDump) { + e.printStackTrace(); + } + } + catch (ClassCastException e) { + MessageHandler.errorln(parserClassName + " is not a SAX driver"); + if (errorDump) { + e.printStackTrace(); + } + } + return null; + } + + /** + * Dumps an error + */ + public void dumpError(Exception e) { + if (errorDump) { + if (e instanceof SAXException) { + e.printStackTrace(); + if (((SAXException) e).getException() != null) { + ((SAXException) e).getException().printStackTrace(); + } + } else { + e.printStackTrace(); + } + } + } + + /** + * long or short error messages + * + */ + public void setDumpError(boolean dumpError) { + this.errorDump = errorDump; + } + +} diff --git a/src/org/apache/fop/configuration/PDFConfiguration.java b/src/org/apache/fop/configuration/PDFConfiguration.java new file mode 100644 index 000000000..ae62719bc --- /dev/null +++ b/src/org/apache/fop/configuration/PDFConfiguration.java @@ -0,0 +1,147 @@ +package org.apache.fop.configuration; + +import java.util.Vector; +import java.util.Hashtable; +import java.util.Enumeration; + +/** + * a configuration class for information related to the pdf renderer. All configuration is stored + * in key / value pairs. The value can be a String, a list of Strings + * or a map, containing a list of key / value pairs. + * + */ + +public class PDFConfiguration { + + /** stores the configuration information */ + private static Hashtable configuration; + + /** + * general access method + * + * @param key a string containing the key value for the configuration value + * @return Object containing the value; normally you would use one of the + * convenience methods, which return the correct form. + * null if the key is not defined. + */ + public static Object getValue (String key){ + return configuration.get(key); + }; + + /** + * convenience methods to access strings values in the configuration + * @param key a string containing the key value for the configuration value + * @return String a string containing the value + * null if the key is not defined. + */ + public static String getStringValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof String) { + return (String) obj; + } else { + return null; + } + }; + + /** + * convenience methods to access int values in the configuration + * @param key a string containing the key value for the configuration value + * @return int a int containing the value + * -1 if the key is not defined. + */ + public static int getIntValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof String) { + return Integer.parseInt((String) obj); + } else { + return -1; + } + }; + + /** + * convenience methods to access list values in the configuration + * @param key a string containing the key value for the configuration value + * @return Vector a Vector containing the values + * null if the key is not defined. + */ + public static Vector getListValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof Vector) { + return (Vector) obj; + } else { + return null; + } + }; + + /** + * convenience methods to access map/hashtable values in the configuration + * @param key a string containing the key value for the configuration value + * @return Hashtable a Hashtable containing the values + * null if the key is not defined. + */ + public static Hashtable getHashtableValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof Hashtable) { + return (Hashtable) obj; + } else { + return null; + } + }; + + /** + * adds information to the configuration map/hashtable in key,value form + * @param key a string containing the key value for the configuration value + * @param value an Object containing the value; can be a String, a Vector or a Hashtable + */ + public static void put(String key,Object value){ + configuration.put(key,value); + }; + + /** + * debug methods, which writes out all information in this configuration + */ + public static void dumpConfiguration() { + String key; + Object value; + Vector list; + Hashtable map; + Enumeration enum; + String tmp; + System.out.println("Dumping standard configuration: "); + Enumeration enumeration = configuration.keys(); + while (enumeration.hasMoreElements()) { + key = (String) enumeration.nextElement(); + System.out.print(" key: " + key); + value = configuration.get(key); + if (value instanceof String) { + System.out.println(" value: " + value); + } else if (value instanceof Vector) { + list = (Vector) value; + enum = list.elements(); + System.out.print(" value: "); + while (enum.hasMoreElements()) { + System.out.print( enum.nextElement() + " - "); + } + System.out.println(""); + } else if (value instanceof Hashtable) { + map = (Hashtable) value; + enum = map.keys(); + while (enum.hasMoreElements()) { + tmp = (String) enum.nextElement(); + System.out.print(" " + tmp + ":" + map.get(tmp)); + } + System.out.println(""); + } + } + + } + + /** + * initializes this configuration + * @param config contains the configuration information + */ + public static void setup(Hashtable config){ + configuration = config; + } + +} diff --git a/src/org/apache/fop/configuration/StandardConfiguration.java b/src/org/apache/fop/configuration/StandardConfiguration.java new file mode 100644 index 000000000..98a0cf845 --- /dev/null +++ b/src/org/apache/fop/configuration/StandardConfiguration.java @@ -0,0 +1,171 @@ + +package org.apache.fop.configuration; + +import java.util.Vector; +import java.util.Hashtable; +import java.util.Enumeration; + +/** + * a configuration class for all general configuration aspects except those + * related to specific renderers. All configuration is stored + * in key / value pairs. The value can be a String, a list of Strings + * or a map, containing a list of key / value pairs. + * + */ +public class StandardConfiguration { + + /** stores the configuration information */ + private static Hashtable configuration; + + /** + * general access method + * + * @param key a string containing the key value for the configuration value + * @return Object containing the value; normally you would use one of the + * convenience methods, which return the correct form. + * null if the key is not defined. + */ + public static Object getValue (String key){ + return configuration.get(key); + }; + + /** + * convenience methods to access strings values in the configuration + * @param key a string containing the key value for the configuration value + * @return String a string containing the value + * null if the key is not defined. + */ + public static String getStringValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof String) { + return (String) obj; + } else { + return null; + } + }; + + /** + * convenience methods to access int values in the configuration + * @param key a string containing the key value for the configuration value + * @return int a int containing the value + * -1 if the key is not defined. + */ + public static int getIntValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof String) { + return Integer.parseInt((String) obj); + } else { + return -1; + } + }; + + /** + * convenience methods to access boolean values in the configuration + * @param key a string containing the key value for the configuration value + * @return boolean true or false as value + * -1 if the key is not defined. + */ + public static Boolean getBooleanValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof String) { + String value = (String) obj; + if (value.equals("true")) { + return new Boolean(true); + } else if (value.equals("false")) { + return new Boolean(false); + } else { + return null; + } + } else { + return null; + } + }; + + /** + * convenience methods to access list values in the configuration + * @param key a string containing the key value for the configuration value + * @return Vector a Vector containing the values + * null if the key is not defined. + */ + public static Vector getListValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof Vector) { + return (Vector) obj; + } else { + return null; + } + }; + + /** + * convenience methods to access map/hashtable values in the configuration + * @param key a string containing the key value for the configuration value + * @return Hashtable a Hashtable containing the values + * null if the key is not defined. + */ + public static Hashtable getHashtableValue(String key){ + Object obj = configuration.get(key); + if (obj instanceof Hashtable) { + return (Hashtable) obj; + } else { + return null; + } + }; + + /** + * adds information to the configuration map/hashtable in key,value form + * @param key a string containing the key value for the configuration value + * @param value an Object containing the value; can be a String, a Vector or a Hashtable + */ + public static void put(String key,Object value){ + configuration.put(key,value); + }; + + /** + * debug methods, which writes out all information in this configuration + */ + public static void dumpConfiguration() { + String key; + Object value; + Vector list; + Hashtable map; + Enumeration enum; + String tmp; + System.out.println("Dumping standard configuration: "); + Enumeration enumeration = configuration.keys(); + while (enumeration.hasMoreElements()) { + key = (String) enumeration.nextElement(); + System.out.print(" key: " + key); + value = configuration.get(key); + if (value instanceof String) { + System.out.println(" value: " + value); + } else if (value instanceof Vector) { + list = (Vector) value; + enum = list.elements(); + System.out.print(" value: "); + while (enum.hasMoreElements()) { + System.out.print( enum.nextElement() + " - "); + } + System.out.println(""); + } else if (value instanceof Hashtable) { + map = (Hashtable) value; + enum = map.keys(); + while (enum.hasMoreElements()) { + tmp = (String) enum.nextElement(); + System.out.print(" " + tmp + ":" + map.get(tmp)); + } + System.out.println(""); + } + } + + } + + /** + * initializes this configuration + * @param config contains the configuration information + */ + public static void setup(Hashtable config){ + configuration = config; + } + +} + diff --git a/src/org/apache/fop/fo/Property.java b/src/org/apache/fop/fo/Property.java index 552bff53e..90394a9d5 100644 --- a/src/org/apache/fop/fo/Property.java +++ b/src/org/apache/fop/fo/Property.java @@ -343,6 +343,7 @@ public class Property { public LengthRange getLengthRange() { return null; } public Space getSpace() { return null; } public int getEnum() { return 0; } + public char getCharacter() { return 0;} public Number getNumber() { return null; } diff --git a/src/org/apache/fop/fo/flow/Block.java b/src/org/apache/fop/fo/flow/Block.java index 55348eb60..c2a4fe6fc 100644 --- a/src/org/apache/fop/fo/flow/Block.java +++ b/src/org/apache/fop/fo/flow/Block.java @@ -107,6 +107,13 @@ public class Block extends FObjMixed { int borderRightWidth; int borderRightStyle; + int hyphenate; + char hyphenationChar; + int hyphenationPushCharacterCount; + int hyphenationRemainCharacterCount; + String language; + String country; + BlockArea blockArea; // this may be helpful on other FOs too @@ -223,6 +230,18 @@ public class Block extends FObjMixed { this.blockOrphans = this.properties.get("orphans").getNumber().intValue(); + this.hyphenate = this.properties.get("hyphenate").getEnum(); + this.hyphenationChar = this.properties.get("hyphenation-character").getCharacter(); + this.hyphenationPushCharacterCount = this.properties.get( + "hyphenation-push-character-count").getNumber(). + intValue(); + this.hyphenationRemainCharacterCount = this.properties.get( + "hyphenation-remain-character-count").getNumber(). + intValue(); + this.language = this.properties.get("language").getString(); + this.country = this.properties.get("country").getString(); + + this.id = this.properties.get("id").getString(); if (area instanceof BlockArea) { @@ -306,6 +325,9 @@ public class Block extends FObjMixed { borderBottomWidth, borderRightWidth); blockArea.setBorderColor(borderTopColor, borderLeftColor, borderBottomColor, borderRightColor); + blockArea.setHyphenation(language, country, hyphenate, + hyphenationChar, hyphenationPushCharacterCount, + hyphenationRemainCharacterCount); blockArea.start(); blockArea.setAbsoluteHeight(area.getAbsoluteHeight()); diff --git a/src/org/apache/fop/layout/BlockArea.java b/src/org/apache/fop/layout/BlockArea.java index 214d95c20..3a7eb48c0 100644 --- a/src/org/apache/fop/layout/BlockArea.java +++ b/src/org/apache/fop/layout/BlockArea.java @@ -84,6 +84,14 @@ public class BlockArea extends Area { /* have any line areas been used? */ protected boolean hasLines = false; + /*hyphenation*/ + protected int hyphenate; + protected char hyphenationChar; + protected int hyphenationPushCharacterCount; + protected int hyphenationRemainCharacterCount; + protected String language; + protected String country; + public BlockArea(FontState fontState, int allocationWidth, int maxHeight, int startIndent, int endIndent, int textIndent, int align, int alignLastLine, int lineHeight) { @@ -124,6 +132,10 @@ public class BlockArea extends Area { this.currentLineArea.changeColor(red, green, blue); this.currentLineArea.changeWrapOption(wrapOption); this.currentLineArea.changeWhiteSpaceCollapse(whiteSpaceCollapse); + this.currentLineArea.changeHyphenation(language, country, hyphenate, + hyphenationChar, hyphenationPushCharacterCount, + hyphenationRemainCharacterCount); + if (ls != null) { this.currentLinkSet = ls; @@ -156,7 +168,9 @@ public class BlockArea extends Area { this.currentLineArea.changeColor(red, green, blue); this.currentLineArea.changeWrapOption(wrapOption); this.currentLineArea.changeWhiteSpaceCollapse(whiteSpaceCollapse); - + this.currentLineArea.changeHyphenation(language, country, hyphenate, + hyphenationChar, hyphenationPushCharacterCount, + hyphenationRemainCharacterCount); if (ls != null) { this.currentLinkSet = ls; ls.setYOffset(currentHeight); @@ -182,6 +196,9 @@ public class BlockArea extends Area { this.currentLineArea.changeWrapOption(wrapOption); this.currentLineArea.changeWhiteSpaceCollapse( whiteSpaceCollapse); + this.currentLineArea.changeHyphenation(language, country, hyphenate, + hyphenationChar, hyphenationPushCharacterCount, + hyphenationRemainCharacterCount); if (ls != null) { ls.setYOffset(currentHeight); } @@ -299,4 +316,15 @@ public class BlockArea extends Area { return halfLeading; } + public void setHyphenation(String language, String country, int hyphenate, char hyphenationChar, + int hyphenationPushCharacterCount, + int hyphenationRemainCharacterCount) { + this.language = language; + this.country = country; + this.hyphenate = hyphenate; + this.hyphenationChar = hyphenationChar; + this.hyphenationPushCharacterCount = hyphenationPushCharacterCount; + this.hyphenationRemainCharacterCount = hyphenationRemainCharacterCount; + } + } diff --git a/src/org/apache/fop/layout/InlineArea.java b/src/org/apache/fop/layout/InlineArea.java index c445cd0e7..41b3f3a02 100644 --- a/src/org/apache/fop/layout/InlineArea.java +++ b/src/org/apache/fop/layout/InlineArea.java @@ -1,36 +1,36 @@ -/*-- $Id$ -- +/*-- $Id$ -- ============================================================================ The Apache Software License, Version 1.1 ============================================================================ - + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. - + Redistribution and use in source and binary forms, with or without modifica- tion, are permitted provided that the following conditions are met: - + 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - + 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - + 3. The end-user documentation included with the redistribution, if any, must include the following acknowledgment: "This product includes software developed by the Apache Software Foundation (http://www.apache.org/)." Alternately, this acknowledgment may appear in the software itself, if and wherever such third-party acknowledgments normally appear. - + 4. The names "Fop" and "Apache Software Foundation" must not be used to endorse or promote products derived from this software without prior written permission. For written permission, please contact apache@apache.org. - + 5. Products derived from this software may not be called "Apache", nor may "Apache" appear in their name, without prior written permission of the Apache Software Foundation. - + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE @@ -41,12 +41,12 @@ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - + This software consists of voluntary contributions made by many individuals on behalf of the Apache Software Foundation and was originally created by - James Tauber <jtauber@jtauber.com>. For more information on the Apache + James Tauber <jtauber@jtauber.com>. For more information on the Apache Software Foundation, please see <http://www.apache.org/>. - + */ package org.apache.fop.layout; @@ -55,7 +55,7 @@ import org.apache.fop.render.Renderer; public class InlineArea extends Area { private String text; - protected String pageNumberId=null; + protected String pageNumberId = null; private float red, green, blue; // Textdecoration @@ -64,33 +64,34 @@ public class InlineArea extends Area { protected boolean lineThrough = false; - public InlineArea(FontState fontState, float red, float green, float blue, String text, int width) { - super(fontState); - this.red = red; - this.green = green; - this.blue = blue; - this.text = text; - this.contentRectangleWidth = width; + public InlineArea(FontState fontState, float red, float green, + float blue, String text, int width) { + super(fontState); + this.red = red; + this.green = green; + this.blue = blue; + this.text = text; + this.contentRectangleWidth = width; } public void render(Renderer renderer) { - renderer.renderInlineArea(this); + renderer.renderInlineArea(this); } public float getBlue() { - return this.blue; + return this.blue; } public float getGreen() { - return this.green; + return this.green; } public float getRed() { - return this.red; + return this.red; } public String getText() { - return this.text; + return this.text; } public String getPageNumberID() { @@ -98,11 +99,11 @@ public class InlineArea extends Area { } public void setUnderlined(boolean ul) { - this.underlined = ul; + this.underlined = ul; } public boolean getUnderlined() { - return this.underlined; + return this.underlined; } } diff --git a/src/org/apache/fop/layout/LineArea.java b/src/org/apache/fop/layout/LineArea.java index a35a2b4bd..63d2ef0d3 100644 --- a/src/org/apache/fop/layout/LineArea.java +++ b/src/org/apache/fop/layout/LineArea.java @@ -51,26 +51,28 @@ package org.apache.fop.layout; +//fop import org.apache.fop.render.Renderer; import org.apache.fop.messaging.MessageHandler; import org.apache.fop.layout.LeaderArea; +import org.apache.fop.datatypes.IDNode; +import org.apache.fop.fo.properties.WrapOption; +import org.apache.fop.fo.properties.WhiteSpaceCollapse; +import org.apache.fop.fo.properties.TextAlign; +import org.apache.fop.fo.properties.TextAlignLast; +import org.apache.fop.fo.properties.LeaderPattern; +import org.apache.fop.fo.properties.Hyphenate; +import org.apache.fop.fo.properties.CountryMaker; +import org.apache.fop.fo.properties.LanguageMaker; +import org.apache.fop.fo.properties.LeaderAlignment; +import org.apache.fop.layout.hyphenation.Hyphenation; +import org.apache.fop.layout.hyphenation.Hyphenator; +//java import java.util.Vector; import java.util.Enumeration; import java.awt.Rectangle; -import org.apache.fop.fo.properties.WrapOption; // for enumerated -// values -import org.apache.fop.fo.properties.WhiteSpaceCollapse; // for -// enumerated values -import org.apache.fop.fo.properties.TextAlign; // for enumerated -// values -import org.apache.fop.fo.properties.TextAlignLast; // for enumerated -// values -import org.apache.fop.fo.properties.LeaderPattern; -import org.apache.fop.fo.properties.LeaderAlignment; - -import org.apache.fop.datatypes.IDNode; public class LineArea extends Area { @@ -91,6 +93,14 @@ public class LineArea extends Area { private int wrapOption; private int whiteSpaceCollapse; + /*hyphenation*/ + protected int hyphenate; + protected char hyphenationChar; + protected int hyphenationPushCharacterCount; + protected int hyphenationRemainCharacterCount; + protected String language; + protected String country; + /* the width of text that has definitely made it into the line area */ protected int finalWidth = 0; @@ -183,10 +193,10 @@ public class LineArea extends Area { /** - * adds text to line area - * - * @return int character position - */ + * adds text to line area + * + * @return int character position + */ public int addText(char odata[], int start, int end, LinkSet ls, boolean ul) { boolean overrun = false; @@ -199,12 +209,6 @@ public class LineArea extends Area { data[count] = odata[count]; } - // added by hani 9/13/2000 to maintain my sanity - // and to prevent array index out of bounds. - if( start == -1 ) - return -1; - - /* iterate over each character */ for (int i = start; i < end; i++) { int charWidth; @@ -389,7 +393,11 @@ public class LineArea extends Area { return wordStart; } } else if (this.wrapOption == WrapOption.WRAP) { + if (this.hyphenate == Hyphenate.TRUE) { + return this.doHyphenation(data,i,wordStart,this.getContentWidth()-finalWidth-pendingWidth-spaceWidth); + } else { return wordStart; + } } } @@ -423,18 +431,19 @@ public class LineArea extends Area { } /** - * adds a Leader; actually the method receives the leader properties - * and creates a leader area or an inline area which is appended to - * the children of the containing line area. <br> - * leader pattern use-content is not implemented. - */ + * adds a Leader; actually the method receives the leader properties + * and creates a leader area or an inline area which is appended to + * the children of the containing line area. <br> + * leader pattern use-content is not implemented. + */ public void addLeader(int leaderPattern, int leaderLengthMinimum, int leaderLengthOptimum, int leaderLengthMaximum, int ruleStyle, int ruleThickness, int leaderPatternWidth, int leaderAlignment) { InlineArea leaderPatternArea; int leaderLength; - int remainingWidth = this.getContentWidth() - this.getCurrentXPosition(); + int remainingWidth = + this.getContentWidth() - this.getCurrentXPosition(); //here is the point to decide which leader-length is to be used, either //optimum or maximum. At the moment maximum is used if the remaining @@ -450,7 +459,8 @@ public class LineArea extends Area { //whitespace setting must be false for this int whiteSpaceSetting = this.whiteSpaceCollapse; this.changeWhiteSpaceCollapse(WhiteSpaceCollapse.FALSE); - pendingAreas.addElement(this.buildSimpleLeader(32,leaderLength)); + pendingAreas.addElement( + this.buildSimpleLeader(32, leaderLength)); this.changeWhiteSpaceCollapse(whiteSpaceSetting); break; case LeaderPattern.RULE: @@ -468,19 +478,20 @@ public class LineArea extends Area { } //if value of leader-pattern-width is 'use-font-metrics' (0) if (leaderPatternWidth == 0) { - pendingAreas.addElement(this.buildSimpleLeader(46,leaderLength)); + pendingAreas.addElement( + this.buildSimpleLeader(46, leaderLength)); } else { //if leader-alignment is used, calculate space to insert before leader //so that all dots will be parallel. if (leaderAlignment == LeaderAlignment.REFERENCE_AREA) { - int spaceBeforeLeader = - this.getLeaderAlignIndent(leaderLength, - leaderPatternWidth); + int spaceBeforeLeader = this.getLeaderAlignIndent( + leaderLength, leaderPatternWidth); //appending indent space leader-alignment //setting InlineSpace to false, so it is not used in line justification if (spaceBeforeLeader != 0) { pendingAreas.addElement( - new InlineSpace(spaceBeforeLeader,false)); + new InlineSpace(spaceBeforeLeader, + false)); pendingWidth += spaceBeforeLeader; //shorten leaderLength, otherwise - in case of //leaderLength=remaining length - it will cut off the end of @@ -493,12 +504,14 @@ public class LineArea extends Area { //inline area with this width InlineSpace spaceBetweenDots = new InlineSpace(leaderPatternWidth - - this.currentFontState.width(46),false); - leaderPatternArea = new InlineArea(currentFontState, this.red, - this.green, this.blue, new String ("."), - this.currentFontState.width(46)); - int dotsFactor = (int) Math.floor (((double) leaderLength )/ - ((double)leaderPatternWidth)); + this.currentFontState.width(46), false); + leaderPatternArea = + new InlineArea(currentFontState, this.red, + this.green, this.blue, new String ("."), + this.currentFontState.width(46)); + int dotsFactor = (int) Math.floor ( + ((double) leaderLength) / + ((double) leaderPatternWidth)); //add combination of dot + space to fill leader //is there a way to do this in a more effective way? @@ -513,8 +526,8 @@ public class LineArea extends Area { break; //leader pattern use-content not implemented. case LeaderPattern.USECONTENT: - MessageHandler.errorln("leader-pattern=\"use-content\" not " - + "supported by this version of Fop"); + MessageHandler.errorln( + "leader-pattern=\"use-content\" not " + "supported by this version of Fop"); return; } //adds leader length to length of pending inline areas @@ -525,20 +538,15 @@ public class LineArea extends Area { } /** - * adds pending inline areas to the line area - * normally done,if the line area is filled and - * added as child to the parent block area - */ + * adds pending inline areas to the line area + * normally done,if the line area is filled and + * added as child to the parent block area + */ public void addPending() { if (spaceWidth > 0) { - // by Dresdner Bank, Germany - // this should handle the correct amount of space after - // the text if there is no more text, important for right alignment - if(this.whiteSpaceCollapse == WhiteSpaceCollapse.FALSE || pendingAreas.size() > 0) { - addChild(new InlineSpace(spaceWidth)); - finalWidth += spaceWidth; - spaceWidth = 0; - } + addChild(new InlineSpace(spaceWidth)); + finalWidth += spaceWidth; + spaceWidth = 0; } Enumeration e = pendingAreas.elements(); @@ -555,9 +563,9 @@ public class LineArea extends Area { } /** - * aligns line area - * - */ + * aligns line area + * + */ public void align(int type) { int padding = 0; @@ -585,8 +593,8 @@ public class LineArea extends Area { if (b instanceof InlineSpace) { InlineSpace space = (InlineSpace) b; if (space.getResizeable()) { - spaceList.addElement(space); - spaceCount++; + spaceList.addElement(space); + spaceCount++; } } } @@ -660,32 +668,52 @@ public class LineArea extends Area { } /** - * creates a leader as String out of the given char and the leader length - * and wraps it in an InlineArea which is returned + * sets hyphenation related traits: language, country, hyphenate, hyphenation-character + * and minimum number of character to remain one the previous line and to be on the + * next line. */ - private InlineArea buildSimpleLeader(int charNumber,int leaderLength) { - int factor = (int) Math.floor (leaderLength / - this.currentFontState.width(charNumber)); - char [] leaderChars = new char [factor]; - char fillChar = (char) charNumber; - for (int i = 0; i < factor; i ++) { - leaderChars[i] = fillChar; - } - InlineArea leaderPatternArea = new InlineArea(currentFontState, this.red, - this.green, this.blue, new String (leaderChars), - leaderLength); - return leaderPatternArea; + public void changeHyphenation(String language, String country, + int hyphenate, char hyphenationChar, + int hyphenationPushCharacterCount, + int hyphenationRemainCharacterCount) { + this.language = language; + this.country = country; + this.hyphenate = hyphenate; + this.hyphenationChar = hyphenationChar; + this.hyphenationPushCharacterCount = hyphenationPushCharacterCount; + this.hyphenationRemainCharacterCount = + hyphenationRemainCharacterCount; + } + /** - * calculates the width of space which has to be inserted before the - * start of the leader, so that all leader characters are aligned. - * is used if property leader-align is set. At the moment only the value - * for leader-align="reference-area" is supported. - * - */ + * creates a leader as String out of the given char and the leader length + * and wraps it in an InlineArea which is returned + */ + private InlineArea buildSimpleLeader(int charNumber, int leaderLength) { + int factor = (int) Math.floor (leaderLength / + this.currentFontState.width(charNumber)); + char [] leaderChars = new char [factor]; + char fillChar = (char) charNumber; + for (int i = 0; i < factor; i ++) { + leaderChars[i] = fillChar; + } + InlineArea leaderPatternArea = + new InlineArea(currentFontState, this.red, this.green, + this.blue, new String (leaderChars), leaderLength); + return leaderPatternArea; + } + + /** + * calculates the width of space which has to be inserted before the + * start of the leader, so that all leader characters are aligned. + * is used if property leader-align is set. At the moment only the value + * for leader-align="reference-area" is supported. + * + */ private int getLeaderAlignIndent (int leaderLength, - int leaderPatternWidth ) { + int leaderPatternWidth) { //calculate position of used space in line area double position = getCurrentXPosition(); //calculate factor of next leader pattern cycle @@ -699,11 +727,81 @@ public class LineArea extends Area { } /** - * calculates the used space in this line area - */ + * calculates the used space in this line area + */ private int getCurrentXPosition() { - return finalWidth + spaceWidth + startIndent + pendingWidth; + return finalWidth + spaceWidth + startIndent + pendingWidth; + } + + /** + * extracts a complete word from the character data + */ + private String getHyphenationWord (char [] characters, int wordStart) { + boolean wordendFound = false; + int counter = 0; + char [] newWord = new char [100]; //create a buffer + while ((!wordendFound) && ((wordStart + counter) < characters.length)) { + char tk = characters[wordStart+counter]; + if (Character.isLetter(tk)) { + newWord[counter] = tk; + counter++; + } else { + wordendFound = true; + } + } + return new String (newWord,0,counter); } + private int doHyphenation (char [] characters, int position, int wordStart, int remainingWidth) { + int hyphCharWidth = this.currentFontState.width(this.hyphenationChar); + remainingWidth -= hyphCharWidth; + + String wordToHyphenate = getHyphenationWord(characters,wordStart); + //check whether the language property has been set + if (this.language.equalsIgnoreCase("none")) { + MessageHandler.errorln("if property 'hyphenate' is used, a language must be specified"); + return wordStart; + } + //are there any hyphenation points + Hyphenation hyph = Hyphenator.hyphenate(language,country,wordToHyphenate,hyphenationRemainCharacterCount,hyphenationPushCharacterCount); + if (hyph != null) { + int [] hyphenationPoints = hyph.getHyphenationPoints(); + + int index = 0; + String wordBegin = ""; + int wordBeginWidth = 0; + + while (wordBeginWidth < remainingWidth && hyph.length() > index) { + wordBegin = hyph.getPreHyphenText(index); + wordBeginWidth = getWordWidth(wordBegin); + index++; + } + if (index > 1) { + wordBegin = hyph.getPreHyphenText(index-1) + this.hyphenationChar; + wordBeginWidth = getWordWidth(wordBegin); + InlineArea hia = new InlineArea(currentFontState, + this.red, this.green, this.blue, + wordBegin,wordBegin.length()); + this.addChild(new InlineSpace(currentFontState.width(32))); + this.addChild(hia); + + //calculate the space needed + finalWidth += wordBeginWidth + currentFontState.width(32); + return wordStart + wordBegin.length()-1; + } + } + return wordStart; + } + + private int getWordWidth (String word) { + int wordLength = word.length(); + int width = 0; + char [] characters = new char [wordLength]; + word.getChars(0,wordLength,characters,0); + for (int i = 0; i < wordLength; i++) { + width += this.currentFontState.width(characters[i]); + } + return width; + } } diff --git a/src/org/apache/fop/layout/hyphenation/ByteVector.java b/src/org/apache/fop/layout/hyphenation/ByteVector.java new file mode 100644 index 000000000..10b4cf2ad --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/ByteVector.java @@ -0,0 +1,158 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; + +import java.io.Serializable; + +/** + * This class implements a simple byte vector with access to the + * underlying array. + * + * @author Carlos Villegas <cav@uniscope.co.jp> + */ +public class ByteVector implements Serializable +{ + /** Capacity increment size */ + private final static int DEFAULT_BLOCK_SIZE=2048; + private int BLOCK_SIZE; + + /** The encapsulated array */ + private byte[] array; + + /** Points to next free item */ + private int n; + + public ByteVector() + { + this(DEFAULT_BLOCK_SIZE); + } + + public ByteVector(int capacity) + { + if ( capacity > 0 ) + BLOCK_SIZE = capacity; + else + BLOCK_SIZE = DEFAULT_BLOCK_SIZE; + array = new byte[BLOCK_SIZE]; + n = 0; + } + + public ByteVector(byte[] a) + { + BLOCK_SIZE = DEFAULT_BLOCK_SIZE; + array = a; + n = 0; + } + + public ByteVector(byte[] a, int capacity) + { + if ( capacity > 0 ) + BLOCK_SIZE = capacity; + else + BLOCK_SIZE = DEFAULT_BLOCK_SIZE; + array = a; + n = 0; + } + + public byte[] getArray() + { + return array; + } + + /** return number of items in array */ + public int length() + { + return n; + } + + /** returns current capacity of array */ + public int capacity() + { + return array.length; + } + + public void put(int index, byte val) + { + array[index] = val; + } + + public byte get(int index) + { + return array[index]; + } + + /** + * This is to implement memory allocation in the array. Like malloc(). + */ + public int alloc(int size) + { + int index = n; + int len = array.length; + if ( n+size >= len ) { + byte[] aux = new byte[len+BLOCK_SIZE]; + System.arraycopy(array, 0, aux, 0, len); + array = aux; + } + n += size; + return index; + } + + public void trimToSize() + { + if ( n < array.length ) { + byte[] aux = new byte[n]; + System.arraycopy(array, 0, aux, 0, n); + array = aux; + } + } + +} diff --git a/src/org/apache/fop/layout/hyphenation/CharVector.java b/src/org/apache/fop/layout/hyphenation/CharVector.java new file mode 100644 index 000000000..b8660f4fe --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/CharVector.java @@ -0,0 +1,168 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; + +import java.io.Serializable; + +/** + * This class implements a simple char vector with access to the + * underlying array. + * + * @author Carlos Villegas <cav@uniscope.co.jp> + */ +public class CharVector implements Cloneable, Serializable + +{ + /** Capacity increment size */ + private final static int DEFAULT_BLOCK_SIZE=2048; + private int BLOCK_SIZE; + + /** The encapsulated array */ + private char[] array; + + /** Points to next free item */ + private int n; + + public CharVector() + { + this(DEFAULT_BLOCK_SIZE); + } + + public CharVector(int capacity) + { + if ( capacity > 0 ) + BLOCK_SIZE = capacity; + else + BLOCK_SIZE = DEFAULT_BLOCK_SIZE; + array = new char[BLOCK_SIZE]; + n = 0; + } + + public CharVector(char[] a) + { + BLOCK_SIZE = DEFAULT_BLOCK_SIZE; + array = a; + n = a.length; + } + + public CharVector(char[] a, int capacity) + { + if ( capacity > 0 ) + BLOCK_SIZE = capacity; + else + BLOCK_SIZE = DEFAULT_BLOCK_SIZE; + array = a; + n = a.length; + } + + /** Reset Vector but don't resize or clear elements */ + public void clear() + { + n = 0; + } + + public Object clone() + { + CharVector cv = new CharVector((char[])array.clone(), BLOCK_SIZE); + cv.n = this.n; + return cv; + } + + public char[] getArray() + { + return array; + } + + /** return number of items in array */ + public int length() + { + return n; + } + + /** returns current capacity of array */ + public int capacity() + { + return array.length; + } + + public void put(int index, char val) + { + array[index] = val; + } + + public char get(int index) + { + return array[index]; + } + + public int alloc(int size) + { + int index = n; + int len = array.length; + if ( n+size >= len ) { + char[] aux = new char[len+BLOCK_SIZE]; + System.arraycopy(array, 0, aux, 0, len); + array = aux; + } + n += size; + return index; + } + + public void trimToSize() + { + if ( n < array.length ) { + char[] aux = new char[n]; + System.arraycopy(array, 0, aux, 0, n); + array = aux; + } + } +} diff --git a/src/org/apache/fop/layout/hyphenation/Hyphen.java b/src/org/apache/fop/layout/hyphenation/Hyphen.java new file mode 100644 index 000000000..8d19b4ed2 --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/Hyphen.java @@ -0,0 +1,101 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; +import java.io.Serializable; + +/** + * This class represents a hyphen. A 'full' hyphen is made of 3 parts: + * the pre-break text, post-break text and no-break. If no line-break + * is generated at this position, the no-break text is used, otherwise, + * pre-break and post-break are used. Typically, pre-break is equal to + * the hyphen character and the others are empty. However, this general + * scheme allows support for cases in some languages where words change + * spelling if they're split across lines, like german's 'backen' which + * hyphenates 'bak-ken'. BTW, this comes from TeX. + * + * @author Carlos Villegas <cav@uniscope.co.jp> + */ + +public class Hyphen implements Serializable +{ + public String preBreak; + public String noBreak; + public String postBreak; + + Hyphen(String pre, String no, String post) + { + preBreak = pre; + noBreak = no; + postBreak = post; + } + + Hyphen(String pre) + { + preBreak = pre; + noBreak = null; + postBreak = null; + } + + public String toString() + { + if ( noBreak == null && postBreak == null && preBreak != null && preBreak.equals("-") ) + return "-"; + StringBuffer res = new StringBuffer("{"); + res.append(preBreak); + res.append("}{"); + res.append(postBreak); + res.append("}{"); + res.append(noBreak); + res.append('}'); + return res.toString(); + } +} diff --git a/src/org/apache/fop/layout/hyphenation/Hyphenation.java b/src/org/apache/fop/layout/hyphenation/Hyphenation.java new file mode 100644 index 000000000..675a7ad3d --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/Hyphenation.java @@ -0,0 +1,106 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; + +import java.util.Vector; + +/** + * This class represents a hyphenated word. + * + * @author Carlos Villegas <cav@uniscope.co.jp> + */ +public class Hyphenation { + int[] hyphenPoints; + String word; + + /** number of hyphenation points in word */ + int len; + + /** rawWord as made of alternating strings and {@link Hyphen Hyphen} + * instances */ + Hyphenation(String word, int[] points) { + this.word = word; + hyphenPoints = points; + len = points.length; + } + + /** @return the number of hyphenation points in the word */ + public int length() { + return len; + } + + /** @return the pre-break text, not including the hyphen character */ + public String getPreHyphenText(int index) { + return word.substring(0, hyphenPoints[index]); + } + + /** @return the post-break text */ + public String getPostHyphenText(int index) { + return word.substring(hyphenPoints[index]); + } + + /** @return the hyphenation points */ + public int [] getHyphenationPoints() { + return hyphenPoints; + } + + public String toString() { + StringBuffer str = new StringBuffer(); + int start = 0; + for (int i = 0; i < len; i++) { + str.append(word.substring(start, hyphenPoints[i]) + "-"); + start = hyphenPoints[i]; + } + str.append(word.substring(start)); + return str.toString(); + } +} diff --git a/src/org/apache/fop/layout/hyphenation/HyphenationException.java b/src/org/apache/fop/layout/hyphenation/HyphenationException.java new file mode 100644 index 000000000..76dfd6054 --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/HyphenationException.java @@ -0,0 +1,63 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; + +/** + * @author Carlos Villegas <cav@uniscope.co.jp> + */ +public class HyphenationException extends Exception { + + public HyphenationException(String msg) + { + super(msg); + } +} diff --git a/src/org/apache/fop/layout/hyphenation/HyphenationTree.java b/src/org/apache/fop/layout/hyphenation/HyphenationTree.java new file mode 100644 index 000000000..bc0e7e532 --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/HyphenationTree.java @@ -0,0 +1,529 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; + +import java.io.*; +import java.util.Vector; +import java.util.Hashtable; + +/** + * This tree structure stores the hyphenation patterns in an efficient + * way for fast lookup. It provides the provides the method to + * hyphenate a word. + * + * @author Carlos Villegas <cav@uniscope.co.jp> + */ +public class HyphenationTree extends TernaryTree + implements PatternConsumer, Serializable +{ + + /** value space: stores the inteletter values */ + protected ByteVector vspace; + /** This map stores hyphenation exceptions */ + protected Hashtable stoplist; + /** This map stores the character classes */ + protected TernaryTree classmap; + /** Temporary map to store interletter values on pattern loading. */ + private transient TernaryTree ivalues; + + public HyphenationTree() + { + stoplist = new Hashtable(23); // usually a small table + classmap = new TernaryTree(); + vspace = new ByteVector(); + vspace.alloc(1); // this reserves index 0, which we don't use + } + + /** + * Packs the values by storing them in 4 bits, two values into a byte + * Values range is from 0 to 9. We use zero as terminator, + * so we'll add 1 to the value. + * @param values a string of digits from '0' to '9' representing the + * interletter values. + * @return the index into the vspace array where the packed values + * are stored. + */ + protected int packValues(String values) + { + int i, n = values.length(); + int m = (n & 1) == 1 ? (n>>1)+2 : (n>>1)+1; + int offset = vspace.alloc(m); + byte[] va = vspace.getArray(); + for(i=0; i<n; i++) { + int j = i>>1; + byte v = (byte)((values.charAt(i) - '0' + 1) & 0x0f); + if ( (i&1) == 1 ) + va[j+offset] = (byte)( va[j+offset] | v ); + else + va[j+offset] = (byte)(v << 4); // big endian + } + va[m-1+offset] = 0; // terminator + return offset; + } + + protected String unpackValues(int k) + { + StringBuffer buf = new StringBuffer(); + byte v = vspace.get(k++); + while( v != 0 ) { + char c = (char)((v >>> 4) -1 + '0'); + buf.append(c); + c = (char)(v & 0x0f); + if ( c == 0 ) break; + c = (char)(c - 1 + '0'); + buf.append(c); + v = vspace.get(k++); + } + return buf.toString(); + } + + /** + * Read hyphenation patterns from an XML file. + */ + public void loadPatterns(String filename) + throws HyphenationException + { + PatternParser pp = new PatternParser(this); + ivalues = new TernaryTree(); + + pp.parse(filename); + + // patterns/values should be now in the tree + // let's optimize a bit + trimToSize(); + vspace.trimToSize(); + classmap.trimToSize(); + + // get rid of the auxiliary map + ivalues = null; + } + + public String findPattern(String pat) + { + int k = super.find(pat); + if ( k >= 0 ) + return unpackValues(k); + return ""; + } + + /** + * String compare, returns 0 if equal or + * t is a substring of s + */ + protected int hstrcmp(char[] s, int si, char[] t, int ti) + { + for ( ; s[si] == t[ti]; si++, ti++) + if (s[si] == 0) + return 0; + if ( t[ti] == 0 ) + return 0; + return s[si] - t[ti]; + } + + protected byte[] getValues(int k) + { + StringBuffer buf = new StringBuffer(); + byte v = vspace.get(k++); + while( v != 0 ) { + char c = (char)((v >>> 4) - 1); + buf.append(c); + c = (char)(v & 0x0f); + if ( c == 0 ) break; + c = (char)(c - 1); + buf.append(c); + v = vspace.get(k++); + } + byte[] res = new byte[buf.length()]; + for(int i=0;i<res.length;i++) + res[i] = (byte)buf.charAt(i); + return res; + } + + /** + * <p>Search for all possible partial matches of word starting + * at index an update interletter values. In other words, it + * does something like:</p> + * <code> + * for(i=0; i<patterns.length; i++) { + * if ( word.substring(index).startsWidth(patterns[i]) ) + * update_interletter_values(patterns[i]); + * } + * </code> + * <p>But it is done in an efficient way since the patterns are + * stored in a ternary tree. In fact, this is the whole purpose + * of having the tree: doing this search without having to test + * every single pattern. The number of patterns for languages + * such as English range from 4000 to 10000. Thus, doing thousands + * of string comparisons for each word to hyphenate would be + * really slow without the tree. The tradeoff is memory, but + * using a ternary tree instead of a trie, almost halves the + * the memory used by Lout or TeX. It's also faster than using + * a hash table</p> + * @param word null terminated word to match + * @param index start index from word + * @param il interletter values array to update + */ + protected void searchPatterns(char[] word, int index, byte[] il) + { + byte[] values; + int i=index; + char p, q; + char sp = word[i]; + p = root; + + while( p != 0 ) { + if (sc[p] == 0xFFFF) { + if ( hstrcmp(word, i, kv.getArray(), lo[p]) == 0 ) { + values = getValues(eq[p]); // data pointer is in eq[] + int j=index; + for(int k=0; k<values.length; k++) { + if ( values[k] > il[j] ) + il[j] = values[k]; + j++; + } + } + return; + } + int d = sp - sc[p]; + if ( d == 0 ) { + if ( sp == 0 ) + break; + sp = word[++i]; + p = eq[p]; + q = p; + + // look for a pattern ending at this position by searching for + // the null char ( splitchar == 0 ) + while ( q != 0 ) { + if ( sc[q] == 0xFFFF ) // stop at compressed branch + break; + if ( sc[q] == 0 ) { + values = getValues(eq[q]); + int j=index; + for(int k=0; k<values.length; k++) { + if ( values[k] > il[j] ) + il[j] = values[k]; + j++; + } + break; + } else { + q = lo[q]; + /** actually the code should be: + q = sc[q] < 0 ? hi[q] : lo[q]; + but java chars are unsigned + */ + } + } + } else + p = d < 0 ? lo[p] : hi[p]; + } + } + + /** + * Hyphenate word and return a Hyphenation object. + * @param word the word to be hyphenated + * @param remainCharCount Minimum number of characters allowed + * before the hyphenation point. + * @param pushCharCount Minimum number of characters allowed after + * the hyphenation point. + * @return a {@link Hyphenation Hyphenation} object representing + * the hyphenated word or null if word is not hyphenated. + */ + public Hyphenation hyphenate(String word, int remainCharCount, int pushCharCount) + { + char[] w = word.toCharArray(); + return hyphenate(w, 0, w.length, remainCharCount, pushCharCount); + } + + /** + * Hyphenate word and return an array of hyphenation points. + * @param w char array that contains the word + * @param offset Offset to first character in word + * @param len Length of word + * @param remainCharCount Minimum number of characters allowed + * before the hyphenation point. + * @param pushCharCount Minimum number of characters allowed after + * the hyphenation point. + * @return a {@link Hyphenation Hyphenation} object representing + * the hyphenated word or null if word is not hyphenated. + */ + public Hyphenation hyphenate(char[] w, int offset, int len, + int remainCharCount, int pushCharCount) + { + int i; + char[] word = new char[len+3]; + + // normalize word + char[] c = new char[2]; + for(i=1; i<=len; i++) { + c[0] = w[offset+i-1]; + int nc = classmap.find(c,0); + if ( nc < 0 ) { // found a non-letter character, abort + return null; + } + word[i] = (char)nc; + } + int[] result = new int[len+1]; + int k=0; + + // check exception list first + String sw = new String(word,1,len); + if ( stoplist.containsKey(sw) ) { + // assume only simple hyphens (Hyphen.pre="-", Hyphen.post = Hyphen.no = null) + Vector hw = (Vector)stoplist.get(sw); + int j = 0; + for(i=0; i<hw.size(); i++) { + Object o = hw.elementAt(i); + if ( o instanceof String ) { + j += ((String)o).length(); + if ( j >= remainCharCount && j < (len - pushCharCount) ) + result[k++] = j; + } + } + } else { + + // use algorithm to get hyphenation points + word[0] = '.'; // word start marker + word[len+1] = '.'; // word end marker + word[len+2] = 0; // null terminated + byte[] il = new byte[len+3]; // initialized to zero + for(i=0; i<len+1; i++) + searchPatterns(word, i, il); + + // hyphenation points are located where interletter value is odd + for(i=0; i<len; i++) { + if ( ((il[i+1] & 1) == 1) && i >= remainCharCount + && i < (len-pushCharCount) ) { + result[k++] = i; + } + } + } + + if ( k > 0 ) { + // trim result array + int[] res = new int[k]; + System.arraycopy(result, 0, res, 0, k); + return new Hyphenation(new String(w,offset,len), res); + } else { + return null; + } + } + + /** + * Add a character class to the tree. It is used by + * {@link PatternParser PatternParser} as callback to + * add character classes. Character classes define the + * valid word characters for hyphenation. If a word contains + * a character not defined in any of the classes, it is not hyphenated. + * It also defines a way to normalize the characters in order + * to compare them with the stored patterns. Usually pattern + * files use only lower case characters, in this case a class + * for letter 'a', for example, should be defined as "aA", the first + * character being the normalization char. + */ + public void addClass(String chargroup) + { + if ( chargroup.length() > 0 ) { + char equivChar = chargroup.charAt(0); + char[] key = new char[2]; + key[1] = 0; + for(int i=0; i<chargroup.length(); i++ ) { + key[0] = chargroup.charAt(i); + classmap.insert(key, 0, equivChar); + } + } + } + + /** + * Add an exception to the tree. It is used by + * {@link PatternParser PatternParser} class as callback to + * store the hyphenation exceptions. + * @param word normalized word + * @param hyphenatedword a vector of alternating strings and + * {@link Hyphen hyphen} objects. + */ + public void addException(String word, Vector hyphenatedword) + { + stoplist.put(word, hyphenatedword); + } + + /** + * Add a pattern to the tree. Mainly, to be used by + * {@link PatternParser PatternParser} class as callback to + * add a pattern to the tree. + * @param pattern the hyphenation pattern + * @param ivalue interletter weight values indicating the + * desirability and priority of hyphenating at a given point + * within the pattern. It should contain only digit characters. + * (i.e. '0' to '9'). + */ + public void addPattern(String pattern, String ivalue) + { + int k = ivalues.find(ivalue); + if ( k <= 0 ) { + k = packValues(ivalue); + ivalues.insert(ivalue, (char)k); + } + insert(pattern, (char)k); + } + + public void printStats() + { + System.out.println("Value space size = " + Integer.toString(vspace.length())); + super.printStats(); + + } + + public static void main(String[] argv) + throws Exception + { + HyphenationTree ht = null; + int minCharCount = 2; + BufferedReader in + = new BufferedReader(new InputStreamReader(System.in)); + for(;;) { + System.out.print("l:\tload patterns from XML\nL:\tload patterns from serialized object\ns:\tset minimun character count\nw:\twrite hyphenation tree to object file\nh:\thyphenate\nf:\tfind pattern\nb:\tbenchmark\nq:\tquit\n\nCommand:"); + String token = in.readLine().trim(); + if ( token.equals("f") ) { + System.out.print("Pattern: "); + token = in.readLine().trim(); + System.out.println("Values: " + ht.findPattern(token)); + } else if ( token.equals("s")) { + System.out.print("Minimun value: " ); + token = in.readLine().trim(); + minCharCount = Integer.parseInt(token); + } else if ( token.equals("l") ) { + ht = new HyphenationTree(); + System.out.print("XML file name: "); + token = in.readLine().trim(); + ht.loadPatterns(token); + } else if ( token.equals("L") ) { + ObjectInputStream ois = null; + System.out.print("Object file name: "); + token = in.readLine().trim(); + try { + ois = new ObjectInputStream(new FileInputStream(token)); + ht = (HyphenationTree)ois.readObject(); + } + catch (Exception e) { + e.printStackTrace(); + } + finally { + if ( ois != null ) { + try { ois.close(); } + catch (IOException e) { } + } + } + } else if ( token.equals("w") ) { + System.out.print("Object file name: "); + token = in.readLine().trim(); + ObjectOutputStream oos = null; + try { + oos = new ObjectOutputStream(new FileOutputStream(token)); + oos.writeObject(ht); + } + catch (Exception e) { + e.printStackTrace(); + } + finally { + if ( oos != null ) { + try { oos.flush(); } + catch (IOException e) {} + try { oos.close(); } + catch (IOException e) {} + } + } + } else if ( token.equals("h") ) { + System.out.print("Word: "); + token = in.readLine().trim(); + System.out.print("Hyphenation points: "); + System.out.println(ht.hyphenate(token,minCharCount,minCharCount)); + } else if ( token.equals("b") ) { + if ( ht == null ) { + System.out.println("No patterns has been loaded."); + break; + } + System.out.print("Word list filename: "); + token = in.readLine().trim(); + long starttime = 0; + int counter = 0;; + try { + BufferedReader reader = new BufferedReader ( new FileReader(token)); + String line; + + starttime = System.currentTimeMillis(); + while ((line = reader.readLine())!= null) { + //System.out.print("\nline: "); + Hyphenation hyp = ht.hyphenate(line,minCharCount,minCharCount); + if (hyp != null) { + String hword = hyp.toString(); + //System.out.println(line); + //System.out.println(hword); + } else { + //System.out.println("No hyphenation"); + } + counter++; + } + } catch (Exception ioe) { + System.out.println("Exception " + ioe); + ioe.printStackTrace(); + } + long endtime = System.currentTimeMillis(); + long result = endtime - starttime; + System.out.println(counter+ " words in " + result + " Millisekunden hyphenated"); + + } else if ( token.equals("q") ) + break; + } + + } +} diff --git a/src/org/apache/fop/layout/hyphenation/Hyphenator.java b/src/org/apache/fop/layout/hyphenation/Hyphenator.java new file mode 100644 index 000000000..73231724d --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/Hyphenator.java @@ -0,0 +1,266 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; + +import java.io.*; +import java.util.Hashtable; +import org.apache.fop.configuration.*; +import org.apache.fop.messaging.MessageHandler; + +/** + * This class is the main entry point to the hyphenation package. + * You can use only the static methods or create an instance. + * + * @author Carlos Villegas <cav@uniscope.co.jp> + */ +public class Hyphenator { + static Hashtable hyphenTrees = new Hashtable(); + + private HyphenationTree hyphenTree = null; + private int remainCharCount = 2; + private int pushCharCount = 2; + private static boolean errorDump = false; + + public Hyphenator(String lang, String country, int leftMin, + int rightMin) { + hyphenTree = getHyphenationTree(lang, country); + remainCharCount = leftMin; + pushCharCount = rightMin; + } + + public static HyphenationTree getHyphenationTree(String lang, + String country) { + String key = lang; + //check whether the country code has been used + if (country != null && !country.equals("none")) + key += "_" + country; + // first try to find it in the cache + if (hyphenTrees.containsKey(key)) + return (HyphenationTree) hyphenTrees.get(key); + if (hyphenTrees.containsKey(lang)) + return (HyphenationTree) hyphenTrees.get(lang); + + HyphenationTree hTree = getFopHyphenationTree(key); + if (hTree == null) { + String hyphenDir = StandardConfiguration.getStringValue("hyphenation-dir"); + if (hyphenDir != null){ + hTree = getUserHyphenationTree(key,hyphenDir); + } + } + //put it into the pattern cache + if (hTree != null) { + hyphenTrees.put(key, hTree); + } else { + MessageHandler.errorln("Couldn't find hyphenation pattern " + key); + } + return hTree; + } + + public static HyphenationTree getFopHyphenationTree (String key) { + HyphenationTree hTree = null; + ObjectInputStream ois = null; + InputStream is = null; + try { + is = Hyphenator.class.getResourceAsStream("/hyph/" + key + ".hyp"); + if (is == null) { + if (key.length() == 5) { + is = Hyphenator.class.getResourceAsStream("/hyph/" + + key.substring(0,2) + ".hyp"); + if (is != null) { + MessageHandler.errorln( + "Couldn't find hyphenation pattern " + key + + "\nusing general language pattern " + + key.substring(0,2) + " instead."); + } else { + if (errorDump){ + MessageHandler.errorln("Couldn't find precompiled " + + "fop hyphenation pattern " + key + ".hyp"); + } + return null; + } + } else { + if (errorDump){ + MessageHandler.errorln("Couldn't find precompiled " + + "fop hyphenation pattern " + key + ".hyp"); + } + return null; + } + } + ois = new ObjectInputStream(is); + hTree = (HyphenationTree) ois.readObject(); + } catch (Exception e) { + e.printStackTrace(); + } finally { + if (ois != null) { + try { + ois.close(); + } catch (IOException e) { + MessageHandler.errorln("can't close hyphenation object stream"); + } + } + } + return hTree; + } + + /** + * load tree from serialized file or xml file + * using configuration settings + */ + public static HyphenationTree getUserHyphenationTree (String key,String hyphenDir) { + HyphenationTree hTree = null; + // I use here the following convention. The file name specified in + // the configuration is taken as the base name. First we try + // name + ".hyp" assuming a serialized HyphenationTree. If that fails + // we try name + ".xml", assumming a raw hyphenation pattern file. + + // first try serialized object + File hyphenFile = new File(hyphenDir, key + ".hyp"); + if (hyphenFile.exists()) { + ObjectInputStream ois = null; + try { + ois = new ObjectInputStream( + new FileInputStream(hyphenFile)); + hTree = (HyphenationTree) ois.readObject(); + } catch (Exception e) { + e.printStackTrace(); + } finally { + if (ois != null) { + try { + ois.close(); + } catch (IOException e) { + } + } + } + return hTree; + } else { + + // try the raw XML file + hyphenFile = new File(hyphenDir, key + ".xml"); + if (hyphenFile.exists()) { + hTree = new HyphenationTree(); + if (errorDump) { + MessageHandler.errorln("reading " + + hyphenDir + key +".xml"); + } + try { + hTree.loadPatterns(hyphenFile.getPath()); + if (errorDump) { + System.out.println("Stats: "); + hTree.printStats(); + } + return hTree; + } catch (HyphenationException ex) { + if (errorDump) { + MessageHandler.errorln("Can't load user patterns " + + "from xml file " + hyphenDir + key +".xml"); + } + return null; + } + } else { + if (errorDump) { + MessageHandler.errorln("Tried to load " + + hyphenFile.toString() + + "\nCannot find compiled nor xml file for " + + "hyphenation pattern" + key ); + } + return null; + } + } + } + + public static Hyphenation hyphenate(String lang, String country, + String word, int leftMin, int rightMin) { + HyphenationTree hTree = getHyphenationTree(lang, country); + if (hTree == null) { + MessageHandler.errorln( + "Error building hyphenation tree for language " + lang); + return null; + } + return hTree.hyphenate(word, leftMin, rightMin); + } + + public static Hyphenation hyphenate(String lang, String country, + char[] word, int offset, int len, int leftMin, int rightMin) { + HyphenationTree hTree = getHyphenationTree(lang, country); + if (hTree == null) { + MessageHandler.errorln( + "Error building hyphenation tree for language " + lang); + return null; + } + return hTree.hyphenate(word, offset, len, leftMin, rightMin); + } + + public void setMinRemainCharCount(int min) { + remainCharCount = min; + } + + public void setMinPushCharCount(int min) { + pushCharCount = min; + } + + public void setLanguage(String lang, String country) { + hyphenTree = getHyphenationTree(lang, country); + } + + public Hyphenation hyphenate(char[] word, int offset, int len) { + if (hyphenTree == null) + return null; + return hyphenTree.hyphenate(word, offset, len, remainCharCount, + pushCharCount); + } + + public Hyphenation hyphenate(String word) { + if (hyphenTree == null) + return null; + return hyphenTree.hyphenate(word, remainCharCount, pushCharCount); + } +} diff --git a/src/org/apache/fop/layout/hyphenation/PatternConsumer.java b/src/org/apache/fop/layout/hyphenation/PatternConsumer.java new file mode 100644 index 000000000..70bc04592 --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/PatternConsumer.java @@ -0,0 +1,85 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; +import java.util.Vector; + +/** + * This interface is used to connect the XML pattern file parser to + * the hyphenation tree. + * + * @author Carlos Villegas <cav@uniscope.co.jp> + */ +public interface PatternConsumer { + + /** Add a character class. + * A character class defines characters that are considered + * equivalent for the purpose of hyphenation (e.g. "aA"). It + * usually means to ignore case. + */ + public void addClass(String chargroup); + + /** Add a hyphenation exception. An exception replaces the + * result obtained by the algorithm for cases for which this + * fails or the user wants to provide his own hyphenation. + * A hyphenatedword is a vector of alternating String's and + * {@link Hyphen Hyphen} instances */ + public void addException(String word, Vector hyphenatedword); + + /** + * Add hyphenation patterns. + * @param pattern + * @param values interletter values expressed as a string of + * digit characters. + */ + public void addPattern(String pattern, String values); + +} diff --git a/src/org/apache/fop/layout/hyphenation/PatternParser.java b/src/org/apache/fop/layout/hyphenation/PatternParser.java new file mode 100644 index 000000000..96495b0b8 --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/PatternParser.java @@ -0,0 +1,450 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; + +// SAX +import org.xml.sax.XMLReader; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.Attributes; + +// Java +import java.io.FileReader; +import java.io.File; +import java.io.FileWriter; +import java.io.PrintWriter; +import java.io.IOException; +import java.io.FileNotFoundException; +import java.util.Vector; +import java.net.URL; + +/** + * A SAX document handler to read and parse hyphenation patterns + * from a XML file. + * + * @author Carlos Villegas <cav@uniscope.co.jp> + */ +public class PatternParser extends DefaultHandler implements PatternConsumer { + + XMLReader parser; + int currElement; + PatternConsumer consumer; + StringBuffer token; + Vector exception; + char hyphenChar; + String errMsg; + + static final int ELEM_CLASSES = 1; + static final int ELEM_EXCEPTIONS = 2; + static final int ELEM_PATTERNS = 3; + static final int ELEM_HYPHEN = 4; + + public PatternParser() + throws HyphenationException + { + token = new StringBuffer(); + parser = createParser(); + parser.setContentHandler(this); + parser.setErrorHandler(this); + hyphenChar = '-'; // default + + } + + public PatternParser(PatternConsumer consumer) + throws HyphenationException + { + this(); + this.consumer = consumer; + } + + public void setConsumer(PatternConsumer consumer) + { + this.consumer = consumer; + } + + public void parse(String filename) + throws HyphenationException + { + InputSource uri = fileInputSource(filename); + + try { + parser.parse(uri); + } catch ( SAXException e ) { + throw new HyphenationException(errMsg); + } catch ( IOException e ) { + throw new HyphenationException(e.getMessage()); + } catch ( NullPointerException e) { + throw new HyphenationException("SAX parser not available"); + } + } + + /** + * creates a SAX parser, using the value of org.xml.sax.parser + * defaulting to org.apache.xerces.parsers.SAXParser + * + * @return the created SAX parser + */ + static XMLReader createParser() + throws HyphenationException + { + String parserClassName = System.getProperty("org.xml.sax.parser"); + if (parserClassName == null) { + parserClassName = "org.apache.xerces.parsers.SAXParser"; + } + // System.out.println("using SAX parser " + parserClassName); + + try { + return (XMLReader) + Class.forName(parserClassName).newInstance(); + } catch (ClassNotFoundException e) { + throw new HyphenationException("Could not find " + parserClassName); + } catch (InstantiationException e) { + throw new HyphenationException("Could not instantiate " + parserClassName); + } catch (IllegalAccessException e) { + throw new HyphenationException("Could not access " + parserClassName); + } catch (ClassCastException e) { + throw new HyphenationException(parserClassName + " is not a SAX driver"); + } + } + + /** + * create an InputSource from a file name + * + * @param filename the name of the file + * @return the InputSource created + */ + protected static InputSource fileInputSource(String filename) + throws HyphenationException + { + + /* this code adapted from James Clark's in XT */ + File file = new File(filename); + String path = file.getAbsolutePath(); + String fSep = System.getProperty("file.separator"); + if (fSep != null && fSep.length() == 1) + path = path.replace(fSep.charAt(0), '/'); + if (path.length() > 0 && path.charAt(0) != '/') + path = '/' + path; + try { + return new InputSource(new URL("file", null, path).toString()); + } + catch (java.net.MalformedURLException e) { + throw new HyphenationException("unexpected MalformedURLException"); + } + } + + protected String readToken(StringBuffer chars) + { + String word; + boolean space = false; + int i; + for(i=0; i<chars.length(); i++) + if ( Character.isWhitespace(chars.charAt(i)) ) + space = true; + else + break; + if ( space ) { + chars.delete(0,i); + if ( token.length() > 0 ) { + word = token.toString(); + token.setLength(0); + return word; + } + } + space = false; + for(i=0; i<chars.length(); i++) { + if ( Character.isWhitespace(chars.charAt(i)) ) { + space = true; + break; + } + } + token.append(chars.substring(0,i)); + chars.delete(0,i); + if ( space ) { + word = token.toString(); + token.setLength(0); + return word; + } + token.append(chars); + return null; + } + + protected static String getPattern(String word) + { + StringBuffer pat = new StringBuffer(); + int len = word.length(); + for(int i=0; i<len; i++) + if ( ! Character.isDigit(word.charAt(i)) ) + pat.append(word.charAt(i)); + return pat.toString(); + } + + protected Vector normalizeException(Vector ex) + { + Vector res = new Vector(); + for(int i=0; i<ex.size(); i++) { + Object item = ex.elementAt(i); + if ( item instanceof String ) { + String str = (String)item; + StringBuffer buf = new StringBuffer(); + for(int j=0; j<str.length(); j++) { + char c = str.charAt(j); + if ( c != hyphenChar ) + buf.append(c); + else { + res.addElement(buf.toString()); + buf.setLength(0); + char[] h = new char[1]; + h[0] = hyphenChar; + // we use here hyphenChar which is not necessarily + // the one to be printed + res.addElement(new Hyphen(new String(h),null,null)); + } + } + if ( buf.length() > 0 ) + res.addElement(buf.toString()); + } else + res.addElement(item); + } + return res; + } + + protected String getExceptionWord(Vector ex) + { + StringBuffer res = new StringBuffer(); + for(int i=0; i<ex.size(); i++) { + Object item = ex.elementAt(i); + if ( item instanceof String ) + res.append((String)item); + else { + if ( ((Hyphen)item).noBreak != null ) + res.append(((Hyphen)item).noBreak); + } + } + return res.toString(); + } + + protected static String getInterletterValues(String pat) + { + StringBuffer il = new StringBuffer(); + String word = pat + "a"; // add dummy letter to serve as sentinel + int len = word.length(); + for(int i=0;i<len;i++) { + char c = word.charAt(i); + if ( Character.isDigit(c) ) { + il.append(c); + i++; + } else il.append('0'); + } + return il.toString(); + } + + // + // DocumentHandler methods + // + + /** Start element. */ + public void startElement(String uri, String local, String raw, Attributes attrs) + { + if ( local.equals("hyphen-char") ) { + String h = attrs.getValue("value"); + if ( h != null && h.length() == 1 ) + hyphenChar = h.charAt(0); + } else if ( local.equals("classes") ) + currElement = ELEM_CLASSES; + else if ( local.equals("patterns") ) + currElement = ELEM_PATTERNS; + else if ( local.equals("exceptions") ) { + currElement = ELEM_EXCEPTIONS; + exception = new Vector(); + } + else if ( local.equals("hyphen") ) { + if ( token.length() > 0 ) { + exception.addElement(token.toString()); + } + exception.addElement(new Hyphen(attrs.getValue("pre"), + attrs.getValue("no"), + attrs.getValue("post"))); + currElement = ELEM_HYPHEN; + } + token.setLength(0); + } + + public void endElement(String uri, String local, String raw) + { + + if ( token.length() > 0 ) { + String word = token.toString(); + switch ( currElement ) { + case ELEM_CLASSES: + consumer.addClass(word); + break; + case ELEM_EXCEPTIONS: + exception.addElement(word); + exception = normalizeException(exception); + consumer.addException(getExceptionWord(exception), + (Vector)exception.clone()); + break; + case ELEM_PATTERNS: + consumer.addPattern(getPattern(word), getInterletterValues(word)); + break; + case ELEM_HYPHEN: + // nothing to do + break; + } + if ( currElement != ELEM_HYPHEN ) + token.setLength(0); + } + if ( currElement == ELEM_HYPHEN ) + currElement = ELEM_EXCEPTIONS; + else + currElement = 0; + + } + + /** Characters. */ + public void characters(char ch[], int start, int length) + { + StringBuffer chars = new StringBuffer(length); + chars.append(ch, start, length); + String word = readToken(chars); + while ( word != null ) { + // System.out.println("\"" + word + "\""); + switch ( currElement ) { + case ELEM_CLASSES: + consumer.addClass(word); + break; + case ELEM_EXCEPTIONS: + exception.addElement(word); + exception = normalizeException(exception); + consumer.addException(getExceptionWord(exception), + (Vector)exception.clone()); + exception.removeAllElements(); + break; + case ELEM_PATTERNS: + consumer.addPattern(getPattern(word), getInterletterValues(word)); + break; + } + word = readToken(chars); + } + + } + + // + // ErrorHandler methods + // + + /** Warning. */ + public void warning(SAXParseException ex) { + errMsg = "[Warning] "+ getLocationString(ex)+": "+ ex.getMessage(); + } + + /** Error. */ + public void error(SAXParseException ex) { + errMsg = "[Error] "+ getLocationString(ex)+": "+ ex.getMessage(); + } + + /** Fatal error. */ + public void fatalError(SAXParseException ex) throws SAXException { + errMsg = "[Fatal Error] "+getLocationString(ex)+": "+ ex.getMessage(); + throw ex; + } + + /** Returns a string of the location. */ + private String getLocationString(SAXParseException ex) { + StringBuffer str = new StringBuffer(); + + String systemId = ex.getSystemId(); + if (systemId != null) { + int index = systemId.lastIndexOf('/'); + if (index != -1) + systemId = systemId.substring(index + 1); + str.append(systemId); + } + str.append(':'); + str.append(ex.getLineNumber()); + str.append(':'); + str.append(ex.getColumnNumber()); + + return str.toString(); + + } // getLocationString(SAXParseException):String + + + // PatternConsumer implementation for testing purposes + public void addClass(String c) + { + System.out.println("class: " + c); + } + + public void addException(String w, Vector e) + { + System.out.println("exception: " + w + " : " + e.toString()); + } + + public void addPattern(String p, String v) + { + System.out.println("pattern: " + p + " : " + v); + } + + public static void main(String[] args) + throws Exception + { + if ( args.length > 0 ){ + PatternParser pp = new PatternParser(); + pp.setConsumer(pp); + pp.parse(args[0]); + } + } +} diff --git a/src/org/apache/fop/layout/hyphenation/TernaryTree.java b/src/org/apache/fop/layout/hyphenation/TernaryTree.java new file mode 100644 index 000000000..ec01994f3 --- /dev/null +++ b/src/org/apache/fop/layout/hyphenation/TernaryTree.java @@ -0,0 +1,690 @@ +/** -- $Id$ -- + + ============================================================================ + The Apache Software License, Version 1.1 + ============================================================================ + + Copyright (C) 1999 The Apache Software Foundation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modifica- + tion, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. The end-user documentation included with the redistribution, if any, must + include the following acknowledgment: "This product includes software + developed by the Apache Software Foundation (http://www.apache.org/)." + Alternately, this acknowledgment may appear in the software itself, if + and wherever such third-party acknowledgments normally appear. + + 4. The names "Fop" and "Apache Software Foundation" must not be used to + endorse or promote products derived from this software without prior + written permission. For written permission, please contact + apache@apache.org. + + 5. Products derived from this software may not be called "Apache", nor may + "Apache" appear in their name, without prior written permission of the + Apache Software Foundation. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU- + DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + This software consists of voluntary contributions made by many individuals + on behalf of the Apache Software Foundation and was originally created by + James Tauber <jtauber@jtauber.com>. For more information on the Apache + Software Foundation, please see <http://www.apache.org/>. + + */ + +package org.apache.fop.layout.hyphenation; + +import java.util.Enumeration; +import java.util.Stack; +import java.io.Serializable; + +/** + * <h2>Ternary Search Tree</h2> + * + * <p>A ternary search tree is a hibrid between a binary tree and + * a digital search tree (trie). Keys are limited to strings. + * A data value of type char is stored in each leaf node. + * It can be used as an index (or pointer) to the data. + * Branches that only contain one key are compressed to one node + * by storing a pointer to the trailer substring of the key. + * This class is intended to serve as base class or helper class + * to implement Dictionary collections or the like. Ternary trees + * have some nice properties as the following: the tree can be + * traversed in sorted order, partial matches (wildcard) can be + * implemented, retrieval of all keys within a given distance + * from the target, etc. The storage requirements are higher than + * a binary tree but a lot less than a trie. Performance is + * comparable with a hash table, sometimes it outperforms a hash + * function (most of the time can determine a miss faster than a hash).</p> + * + * <p>The main purpose of this java port is to serve as a base for + * implementing TeX's hyphenation algorithm (see The TeXBook, + * appendix H). Each language requires from 5000 to 15000 hyphenation + * patterns which will be keys in this tree. The strings patterns + * are usually small (from 2 to 5 characters), but each char in the + * tree is stored in a node. Thus memory usage is the main concern. + * We will sacrify 'elegance' to keep memory requirenments to the + * minimum. Using java's char type as pointer (yes, I know pointer + * it is a forbidden word in java) we can keep the size of the node + * to be just 8 bytes (3 pointers and the data char). This gives + * room for about 65000 nodes. In my tests the english patterns + * took 7694 nodes and the german patterns 10055 nodes, + * so I think we are safe.</p> + * + * <p>All said, this is a map with strings as keys and char as value. + * Pretty limited!. It can be extended to a general map by + * using the string representation of an object and using the + * char value as an index to an array that contains the object + * values.</p> + * + * @author cav@uniscope.co.jp + */ + +public class TernaryTree implements Cloneable, Serializable { + + /** + * We use 4 arrays to represent a node. I guess I should have created + * a proper node class, but somehow Knuth's pascal code made me forget + * we now have a portable language with virtual memory management and + * automatic garbage collection! And now is kind of late, furthermore, + * if it ain't broken, don't fix it. + */ + + /** + * Pointer to low branch and to rest of the key when it is + * stored directly in this node, we don't have unions in java! + */ + protected char[] lo; + + /** + * Pointer to high branch. + */ + protected char[] hi; + + /** + * Pointer to equal branch and to data when this node is a string terminator. + */ + protected char[] eq; + + /** + * <P>The character stored in this node: splitchar + * Two special values are reserved:</P> + * <ul><li>0x0000 as string terminator</li> + * <li>0xFFFF to indicate that the branch starting at + * this node is compressed</li></ul> + * <p>This shouldn't be a problem if we give the usual semantics to + * strings since 0xFFFF is garanteed not to be an Unicode character.</p> + */ + protected char[] sc; + + /** + * This vector holds the trailing of the keys when the branch is compressed. + */ + protected CharVector kv; + + protected char root; + protected char freenode; + protected int length; // number of items in tree + + protected final static int BLOCK_SIZE=2048; // allocation size for arrays + + TernaryTree() + { + init(); + } + + protected void init() + { + root = 0; + freenode = 1; + length = 0; + lo = new char[BLOCK_SIZE]; + hi = new char[BLOCK_SIZE]; + eq = new char[BLOCK_SIZE]; + sc = new char[BLOCK_SIZE]; + kv = new CharVector(); + } + + /** + * Branches are initially compressed, needing + * one node per key plus the size of the string + * key. They are decompressed as needed when + * another key with same prefix + * is inserted. This saves a lot of space, + * specially for long keys. + */ + public void insert(String key, char val) + { + // make sure we have enough room in the arrays + int len = key.length() + 1; // maximum number of nodes that may be generated + if ( freenode + len > eq.length ) + redimNodeArrays(eq.length + BLOCK_SIZE); + char strkey[] = new char[len--]; + key.getChars(0, len, strkey, 0); + strkey[len] = 0; + root = insert(root, strkey, 0, val); + } + + public void insert(char[] key, int start, char val) + { + int len = strlen(key) + 1; + if ( freenode + len > eq.length ) + redimNodeArrays(eq.length + BLOCK_SIZE); + root = insert(root, key, start, val); + } + + /** + * The actual insertion function, recursive version. + */ + private char insert(char p, char[] key, int start, char val) + { + int len = strlen(key, start); + if ( p == 0 ) { + // this means there is no branch, this node will start a new branch. + // Instead of doing that, we store the key somewhere else and create + // only one node with a pointer to the key + p = freenode++; + eq[p] = val; //holds data + length++; + hi[p] = 0; + if ( len > 0 ) { + sc[p] = 0xFFFF; // indicates branch is compressed + lo[p] = (char)kv.alloc(len+1); // use 'lo' to hold pointer to key + strcpy(kv.getArray(), lo[p], key, start); + } else { + sc[p] = 0; + lo[p] = 0; + } + return p; + } + + if ( sc[p] == 0xFFFF ) { + // branch is compressed: need to decompress + // this will generate garbage in the external key array + // but we can do some garbage collection later + char pp = freenode++; + lo[pp] = lo[p]; // previous pointer to key + eq[pp] = eq[p]; // previous pointer to data + lo[p] = 0; + if ( len > 0 ) { + sc[p] = kv.get(lo[pp]); + eq[p] = pp; + lo[pp]++; + if ( kv.get(lo[pp]) == 0 ) { + // key completly decompressed leaving garbage in key array + lo[pp] = 0; + sc[pp] = 0; + hi[pp] = 0; + } else + sc[pp] = 0xFFFF; // we only got first char of key, rest is still there + } else { + // In this case we can save a node by swapping the new node + // with the compressed node + sc[pp] = 0xFFFF; + hi[p] = pp; + sc[p] = 0; + eq[p] = val; + length++; + return p; + } + } + char s = key[start]; + if ( s < sc[p] ) + lo[p] = insert(lo[p], key, start, val); + else if ( s == sc[p] ) { + if ( s != 0 ) + eq[p] = insert(eq[p], key, start+1, val); + else { + // key already in tree, overwrite data + eq[p] = val; + } + + } else + hi[p] = insert(hi[p], key, start, val); + return p; + } + + /** Compares 2 null terminated char arrays */ + public static int strcmp(char[] a, int startA, char[] b, int startB) + { + for(; a[startA] == b[startB]; startA++, startB++) + if ( a[startA] == 0 ) + return 0; + return a[startA] - b[startB]; + } + + /** + * Compares a string with null terminated char array + */ + public static int strcmp(String str, char[] a, int start) + { + int i,d,len=str.length(); + for(i=0; i<len; i++) { + d = (int)str.charAt(i) - a[start+i]; + if ( d != 0 ) + return d; + if ( a[start+i] == 0 ) + return d; + } + if ( a[start+i] != 0 ) + return (int)-a[start+i]; + return 0; + + } + + public static void strcpy(char[] dst, int di, char[] src, int si) + { + while( src[si] != 0 ) + dst[di++] = src[si++]; + dst[di] = 0; + } + + public static int strlen(char[] a, int start) + { + int len=0; + for(int i=start; i<a.length && a[i]!=0; i++) + len++; + return len; + } + + public static int strlen(char[] a) + { + return strlen(a, 0); + } + + public int find(String key) + { + int len = key.length(); + char strkey[] = new char[len+1]; + key.getChars(0, len, strkey, 0); + strkey[len] = 0; + + return find(strkey, 0); + } + + public int find(char[] key, int start) + { + int d; + char p = root; + int i=start; + char c; + + while( p != 0) { + if ( sc[p] == 0xFFFF ) { + if ( strcmp(key, i, kv.getArray(), lo[p]) == 0 ) + return eq[p]; + else + return -1; + } + c = key[i]; + d = c - sc[p]; + if ( d == 0 ) { + if ( c == 0 ) + return eq[p]; + i++; + p = eq[p]; + } else if ( d < 0 ) + p = lo[p]; + else + p = hi[p]; + } + return -1; + } + + public boolean knows(String key) + { + return (find(key) >= 0 ); + } + + // redimension the arrays + private void redimNodeArrays(int newsize) + { + int len = newsize < lo.length ? newsize : lo.length; + char[] na = new char[newsize]; + System.arraycopy(lo, 0, na, 0, len); + lo = na; + na = new char[newsize]; + System.arraycopy(hi, 0, na, 0, len); + hi = na; + na = new char[newsize]; + System.arraycopy(eq, 0, na, 0, len); + eq = na; + na = new char[newsize]; + System.arraycopy(sc, 0, na, 0, len); + sc = na; + } + + public int size() + { + return length; + } + + public Object clone() + { + TernaryTree t = new TernaryTree(); + t.lo = (char[])this.lo.clone(); + t.hi = (char[])this.hi.clone(); + t.eq = (char[])this.eq.clone(); + t.sc = (char[])this.sc.clone(); + t.kv = (CharVector)this.kv.clone(); + t.root = this.root; + t.freenode = this.freenode; + t.length = this.length; + + return t; + } + + /** + * Recursively insert the median first and then the median of the + * lower and upper halves, and so on in order to get a balanced + * tree. The array of keys is assumed to be sorted in ascending + * order. + */ + protected void insertBalanced(String[] k, char[] v, int offset, int n) + { + int m; + if ( n < 1) return; + m = n >> 1; + + insert(k[m+offset], v[m+offset]); + insertBalanced(k, v, offset, m); + + insertBalanced(k, v, offset+m+1, n-m-1); + } + + + /** + * Balance the tree for best search performance + */ + public void balance() + { + // System.out.print("Before root splitchar = "); System.out.println(sc[root]); + + int i=0, n = length; + String[] k = new String[n]; + char[] v = new char[n]; + Iterator iter = new Iterator(); + while ( iter.hasMoreElements() ) { + v[i] = iter.getValue(); + k[i++] = (String)iter.nextElement(); + } + init(); + insertBalanced(k, v, 0, n); + + // With uniform letter distribution sc[root] should be around 'm' + // System.out.print("After root splitchar = "); System.out.println(sc[root]); + } + + /** + * Each node stores a character (splitchar) which is part of + * some key(s). In a compressed branch (one that only contain + * a single string key) the trailer of the key which is not + * already in nodes is stored externally in the kv array. + * As items are inserted, key substrings decrease. + * Some substrings may completely disappear when the whole + * branch is totally decompressed. + * The tree is traversed to find the key substrings actually + * used. In addition, duplicate substrings are removed using + * a map (implemented with a TernaryTree!). + * + */ + public void trimToSize() + { + // first balance the tree for best performance + balance(); + + // redimension the node arrays + redimNodeArrays(freenode); + + // ok, compact kv array + CharVector kx = new CharVector(); + kx.alloc(1); + TernaryTree map = new TernaryTree(); + compact(kx, map, root); + kv = kx; + kv.trimToSize(); + } + + private void compact(CharVector kx, TernaryTree map, char p) + { + int k; + if (p == 0) return; + if ( sc[p] == 0xFFFF ) { + k = map.find(kv.getArray(), lo[p]); + if ( k < 0 ) { + k = kx.alloc(strlen(kv.getArray(),lo[p])+1); + strcpy(kx.getArray(), k, kv.getArray(), lo[p]); + map.insert(kx.getArray(), k, (char)k); + } + lo[p] = (char)k; + } else { + compact(kx, map, lo[p]); + if ( sc[p] != 0 ) + compact(kx, map, eq[p]); + compact(kx, map, hi[p]); + } + } + + + public Enumeration keys() + { + return new Iterator(); + } + + public class Iterator implements Enumeration { + + /** current node index */ + int cur; + + /** current key */ + String curkey; + + private class Item implements Cloneable { + char parent; + char child; + + public Item() + { + parent = 0; + child = 0; + } + + public Item(char p, char c) + { + parent = p; + child = c; + } + + public Object clone() + { + return new Item(parent, child); + } + } + + /** Node stack */ + Stack ns; + + /** key stack implemented with a StringBuffer */ + StringBuffer ks; + + public Iterator() + { + cur = -1; + ns = new Stack(); + ks = new StringBuffer(); + rewind(); + } + + public void rewind() + { + ns.clear(); + ks.setLength(0); + cur = root; + run(); + } + + public Object nextElement() + { + String res = new String(curkey); + cur = up(); + run(); + return res; + } + + public char getValue() + { + if ( cur >= 0 ) + return eq[cur]; + return 0; + } + + public boolean hasMoreElements() + { + return (cur != -1); + } + + /** traverse upwards */ + private int up() + { + Item i = new Item(); + int res=0; + + if ( ns.empty() ) + return -1; + + if ( cur != 0 && sc[cur] == 0 ) + return lo[cur]; + + boolean climb=true; + + while(climb) { + i = (Item)ns.pop(); + i.child++; + switch( i.child ) { + case 1: + if ( sc[i.parent] != 0 ) { + res = eq[i.parent]; + ns.push(i.clone()); + ks.append(sc[i.parent]); + } else { + i.child++; + ns.push(i.clone()); + res = hi[i.parent]; + } + climb = false; + break; + + case 2: + res = hi[i.parent]; + ns.push(i.clone()); + if ( ks.length() > 0 ) + ks.setLength(ks.length()-1); // pop + climb=false; + break; + + default: + if ( ns.empty() ) + return -1; + climb = true; + break; + } + } + return res; + } + + /** traverse the tree to find next key */ + private int run() + { + if ( cur == -1 ) + return -1; + + boolean leaf = false; + for(;;) { + // first go down on low branch until leaf or compressed branch + while ( cur != 0 ) { + if( sc[cur] == 0xFFFF ) { + leaf = true; + break; + } + ns.push( new Item((char)cur, '\u0000') ); + if ( sc[cur] == 0 ) { + leaf = true; + break; + } + cur = lo[cur]; + } + if ( leaf ) break; + // nothing found, go up one node and try again + cur = up(); + if ( cur == -1 ) { + return -1; + } + } + // The current node should be a data node and + // the key should be in the key stack (at least partially) + StringBuffer buf = new StringBuffer(ks.toString()); + if ( sc[cur] == 0xFFFF ) { + int p = lo[cur]; + while( kv.get(p) != 0 ) + buf.append(kv.get(p++)); + } + curkey = buf.toString(); + return 0; + } + + } + + public void printStats() + { + System.out.println("Number of keys = " + Integer.toString(length)); + System.out.println("Node count = " + Integer.toString(freenode)); + // System.out.println("Array length = " + Integer.toString(eq.length)); + System.out.println("Key Array length = " + Integer.toString(kv.length())); + + /* + for(int i=0; i<kv.length(); i++) + if ( kv.get(i) != 0 ) + System.out.print(kv.get(i)); + else + System.out.println(""); + + System.out.println("Keys:"); + for(Enumeration enum = keys(); enum.hasMoreElements(); ) + System.out.println(enum.nextElement()); + */ + + } + + public static void main(String[] args) + throws Exception + { + TernaryTree tt = new TernaryTree(); + tt.insert("Carlos", 'C'); + tt.insert("Car", 'r'); + tt.insert("palos", 'l'); + tt.insert("pa", 'p'); + tt.trimToSize(); + System.out.println((char)tt.find("Car")); + System.out.println((char)tt.find("Carlos")); + System.out.println((char)tt.find("alto")); + tt.printStats(); + } +} + |