From: Simon Pepping Date: Sat, 28 Jan 2006 15:57:59 +0000 (+0000) Subject: An addition to FOP that can be used to render a hyphenation tree in X-Git-Tag: fop-0_92-beta~163 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=af24371d6a82826460535fb2bc91e5355b201659;p=xmlgraphics-fop.git An addition to FOP that can be used to render a hyphenation tree in various ways, among which a complete tree representation. And correction of a few typos in HyphenationTree. git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@373170 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/examples/hyphenation/README b/examples/hyphenation/README new file mode 100644 index 000000000..1b34a9e64 --- /dev/null +++ b/examples/hyphenation/README @@ -0,0 +1,9 @@ +FOP Hyphenation Readme + +The hyphenation analysis is an addition to FOP that can be used to +render a hyphenation tree in various ways, among which a complete tree +representation. + +Building: ant compile + +Running: \ No newline at end of file diff --git a/examples/hyphenation/analyse b/examples/hyphenation/analyse new file mode 100755 index 000000000..dc8ae425d --- /dev/null +++ b/examples/hyphenation/analyse @@ -0,0 +1,246 @@ +#! /bin/sh +# +# Copyright 2006 The Apache Software Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Shell script to run analysis, adapted from the fop shell script + +rpm_mode=true +analyse_exec_args= +no_config=false +analyse_exec_debug=false +show_help=false +for arg in "$@" ; do + if [ "$arg" = "--noconfig" ] ; then + no_config=true + elif [ "$arg" = "--execdebug" ] ; then + analyse_exec_debug=true + elif [ my"$arg" = my"--h" -o my"$arg" = my"--help" ] ; then + show_help=true + analyse_exec_args="$analyse_exec_args -h" + else + if [ my"$arg" = my"-h" -o my"$arg" = my"-help" ] ; then + show_help=true + fi + analyse_exec_args="$analyse_exec_args \"$arg\"" + fi +done + +# Source/default fop configuration +if $no_config ; then + rpm_mode=false +else + # load system-wide fop configuration + if [ -f "/etc/fop.conf" ] ; then + . /etc/fop.conf + fi + + # load user fop configuration + if [ -f "$HOME/.fop/fop.conf" ] ; then + . $HOME/.fop/fop.conf + fi + if [ -f "$HOME/.foprc" ] ; then + . "$HOME/.foprc" + fi + + # provide default configuration values + if [ -z "$rpm_mode" ] ; then + rpm_mode=false + fi + if [ -z "$usejikes" ] ; then + usejikes=$use_jikes_default + fi +fi + +# Setup Java environment in rpm mode +if $rpm_mode ; then + if [ -f /usr/share/java-utils/java-functions ] ; then + . /usr/share/java-utils/java-functions + set_jvm + set_javacmd + fi +fi + +# OS specific support. $var _must_ be set to either true or false. +cygwin=false; +darwin=false; +case "`uname`" in + CYGWIN*) cygwin=true ;; + Darwin*) darwin=true + if [ -z "$JAVA_HOME" ] ; then + JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Home + fi + ;; +esac + +if [ -z "$FOP_HOME" -o ! -d "$FOP_HOME" ] ; then + ## resolve links - $0 may be a link to fop's home + PRG="$0" + progname=`basename "$0"` + + # need this for relative symlinks + while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi + done + + FOP_HOME=../../`dirname "$PRG"` + + # make it fully qualified + FOP_HOME=`cd "$FOP_HOME" && pwd` +fi + +# For Cygwin, ensure paths are in UNIX format before anything is touched +if $cygwin ; then + [ -n "$FOP_HOME" ] && + FOP_HOME=`cygpath --unix "$FOP_HOME"` + [ -n "$JAVA_HOME" ] && + JAVA_HOME=`cygpath --unix "$JAVA_HOME"` +fi + +if [ -z "$JAVACMD" ] ; then + if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + else + JAVACMD=`which java 2> /dev/null ` + if [ -z "$JAVACMD" ] ; then + JAVACMD=java + fi + fi +fi + +if [ ! -x "$JAVACMD" ] ; then + echo "Error: JAVA_HOME is not defined correctly." + echo " We cannot execute $JAVACMD" + exit 1 +fi + +if [ -n "$CLASSPATH" ] ; then + LOCALCLASSPATH=$CLASSPATH +fi + +# add the hyphenation analysis build directory +LOCALCLASSPATH=${FOP_HOME}/examples/hyphenation/build:$LOCALCLASSPATH + +# add fop.jar, fop-sandbox and fop-hyph.jar, which reside in $FOP_HOME/build +LOCALCLASSPATH=${FOP_HOME}/build/fop.jar:${FOP_HOME}/build/fop-sandbox.jar:${FOP_HOME}/build/fop-hyph.jar:$LOCALCLASSPATH + +# add in the dependency .jar files, which reside in $FOP_HOME/lib +DIRLIBS=${FOP_HOME}/lib/*.jar +for i in ${DIRLIBS} +do + # if the directory is empty, then it will return the input string + # this is stupid, so case for it + if [ "$i" != "${DIRLIBS}" ] ; then + if [ -z "$LOCALCLASSPATH" ] ; then + LOCALCLASSPATH=$i + else + LOCALCLASSPATH="$i":$LOCALCLASSPATH + fi + fi +done + +# add in user-defined hyphenation JARs +if [ -n "$FOP_HYPHENATION_PATH" ] ; then + LOCALCLASSPATH=$LOCALCLASSPATH:$FOP_HYPHENATION_PATH +fi + +# For Cygwin, switch paths to appropriate format before running java +# For PATHs convert to unix format first, then to windows format to ensure +# both formats are supported. Probably this will fail on directories with ; +# in the name in the path. Let's assume that paths containing ; are more +# rare than windows style paths on cygwin. +if $cygwin; then + if [ "$OS" = "Windows_NT" ] && cygpath -m .>/dev/null 2>/dev/null ; then + format=mixed + else + format=windows + fi + FOP_HOME=`cygpath --$format "$FOP_HOME"` + LCP_TEMP=`cygpath --path --unix "$LOCALCLASSPATH"` + LOCALCLASSPATH=`cygpath --path --$format "$LCP_TEMP"` + if [ -n "$CLASSPATH" ] ; then + CP_TEMP=`cygpath --path --unix "$CLASSPATH"` + CLASSPATH=`cygpath --path --$format "$CP_TEMP"` + fi + CYGHOME=`cygpath --$format "$HOME"` +fi + +# Show script help if requested +if $show_help ; then + analyse_exec_args="" + echo $0 '[script options] [FOP options]' + echo 'Script Options:' + echo ' --help, -h print this message and FOP help' + echo ' --noconfig suppress sourcing of /etc/fop.conf,' + echo ' $HOME/.fop/fop.conf, and $HOME/.foprc' + echo ' configuration files' + echo ' --execdebug print FOP exec line generated by this' + echo ' launch script' +fi + +# add a second backslash to variables terminated by a backslash under cygwin +if $cygwin; then + case "$FOP_HOME" in + *\\ ) + FOP_HOME="$FOP_HOME\\" + ;; + esac + case "$CYGHOME" in + *\\ ) + CYGHOME="$CYGHOME\\" + ;; + esac + case "$LOCALCLASSPATH" in + *\\ ) + LOCALCLASSPATH="$LOCALCLASSPATH\\" + ;; + esac + case "$CLASSPATH" in + *\\ ) + CLASSPATH="$CLASSPATH\\" + ;; + esac +fi + +# The default commons logger for JDK1.4 is JDK1.4Logger. +# To use a different logger, uncomment the one desired below +# LOGCHOICE=-Dorg.apache.commons.logging.Log=org.apache.commons.logging.impl.NoOpLog +# LOGCHOICE=-Dorg.apache.commons.logging.Log=org.apache.commons.logging.impl.SimpleLog +# LOGCHOICE=-Dorg.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger + +# Logging levels +# Below option is only if you are using SimpleLog instead of the default JDK1.4 Logger. +# To set logging levels for JDK 1.4 Logger, edit the %JAVA_HOME%/JRE/LIB/logging.properties +# file instead. +# Possible SimpleLog values: "trace", "debug", "info" (default), "warn", "error", or "fatal". +# LOGLEVEL=-Dorg.apache.commons.logging.simplelog.defaultlog=INFO + +# Execute analyse using eval/exec to preserve spaces in paths, +# java options, and analyse args +analyse_exec_command="exec \"$JAVACMD\" $LOGCHOICE $LOGLEVEL -classpath \"$LOCALCLASSPATH\" $FOP_OPTS org.apache.fop.hyphenation.HyphenationTreeAnalysis $analyse_exec_args" +if $analyse_exec_debug ; then + echo $analyse_exec_command +fi +eval $analyse_exec_command diff --git a/examples/hyphenation/analyse.cmd b/examples/hyphenation/analyse.cmd new file mode 100644 index 000000000..96ec0951a --- /dev/null +++ b/examples/hyphenation/analyse.cmd @@ -0,0 +1,60 @@ +@ECHO OFF + +rem %~dp0 is the expanded pathname of the current script under NT +set LOCAL_FOP_HOME= +if "%OS%"=="Windows_NT" set LOCAL_FOP_HOME=..\..\%~dp0 + +rem Code from Apache Ant project +rem Slurp the command line arguments. This loop allows for an unlimited number +rem of arguments (up to the command line limit, anyway). +rem Could also do a "shift" and "%*" for all params, but apparently doesn't work +rem with Win9x. +set ANALYSE_CMD_LINE_ARGS=%1 +if ""%1""=="""" goto doneStart +shift +:setupArgs +if ""%1""=="""" goto doneStart +set ANALYSE_CMD_LINE_ARGS=%ANALYSE_CMD_LINE_ARGS% %1 +shift +goto setupArgs +rem This label provides a place for the argument list loop to break out +rem and for NT handling to skip to. +:doneStart + +set LOGCHOICE= +rem The default commons logger for JDK1.4 is JDK1.4Logger. +rem To use a different logger, uncomment the one desired below +rem set LOGCHOICE=-Dorg.apache.commons.logging.Log=org.apache.commons.logging.impl.NoOpLog +rem set LOGCHOICE=-Dorg.apache.commons.logging.Log=org.apache.commons.logging.impl.SimpleLog +rem set LOGCHOICE=-Dorg.apache.commons.logging.Log=org.apache.commons.logging.impl.Log4JLogger + +set LOGLEVEL= +rem Logging levels +rem Below option is only if you are using SimpleLog instead of the default JDK1.4 Logger. +rem To set logging levels for JDK 1.4 Logger, edit the %JAVA_HOME%\JRE\LIB\logging.properties +rem file instead. +rem Possible SimpleLog values: "trace", "debug", "info" (default), "warn", "error", or "fatal". +rem set LOGLEVEL=-Dorg.apache.commons.logging.simplelog.defaultlog=INFO + +set LIBDIR=%LOCAL_FOP_HOME%lib + +set LOCALCLASSPATH=%LOCAL_FOP_HOME%examples\hyphenation\build +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LOCAL_FOP_HOME%build\fop.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LOCAL_FOP_HOME%build\fop-sandbox.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LOCAL_FOP_HOME%build\fop-hyph.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\xml-apis-1.3.02.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\xercesImpl-2.7.1.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\xalan-2.7.0.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\serializer-2.7.0.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\batik-all-1.6.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\avalon-framework-4.2.0.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\commons-io-1.1.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\commons-logging-1.0.4.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\jimi-1.0.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\jai_core.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\jai_codec.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%LIBDIR%\fop-hyph.jar +set LOCALCLASSPATH=%LOCALCLASSPATH%;%FOP_HYPHENATION_PATH% + +java %LOGCHOICE% %LOGLEVEL% -cp "%LOCALCLASSPATH%" org.apache.fop.hyphenation.HyphenationTreeAnalysis %ANALYSE_CMD_LINE_ARGS% + diff --git a/examples/hyphenation/build.xml b/examples/hyphenation/build.xml new file mode 100644 index 000000000..ae20354ce --- /dev/null +++ b/examples/hyphenation/build.xml @@ -0,0 +1,90 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/hyphenation/src/org/apache/fop/hyphenation/HyphenationTreeAnalysis.java b/examples/hyphenation/src/org/apache/fop/hyphenation/HyphenationTreeAnalysis.java new file mode 100644 index 000000000..1349c5bf4 --- /dev/null +++ b/examples/hyphenation/src/org/apache/fop/hyphenation/HyphenationTreeAnalysis.java @@ -0,0 +1,415 @@ +/* + * Copyright 2006 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.hyphenation; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.FileReader; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.List; + +/** + * This class provides some useful methods to print the structure of a HyphenationTree object + */ +public class HyphenationTreeAnalysis extends TernaryTreeAnalysis { + + /** + * The HyphenationTree object to analyse + */ + protected HyphenationTree ht; + + /** + * @param ht the HyphenationTree object + */ + public HyphenationTreeAnalysis(HyphenationTree ht) { + super(ht); + this.ht = ht; + } + + /** + * Class representing a node of the HyphenationTree object + */ + protected class Node extends TernaryTreeAnalysis.Node { + private String value = null; + + /** + * @param index the index of the node + */ + protected Node(int index) { + super(index); + if (isLeafNode) { + value = readValue().toString(); + } + } + + private StringBuffer readValue() { + StringBuffer s = new StringBuffer(); + int i = (int) ht.eq[index]; + byte v = ht.vspace.get(i); + for (; v != 0; v = ht.vspace.get(++i)) { + int c = (int) ((v >>> 4) - 1); + s.append(c); + c = (int) (v & 0x0f); + if (c == 0) { + break; + } + c = (c - 1); + s.append(c); + } + return s; + } + + /* (non-Javadoc) + * @see org.apache.fop.hyphenation.TernaryTreeAnalysis.Node#toNodeString() + */ + public String toNodeString() { + if (isLeafNode) { + StringBuffer s = new StringBuffer(); + s.append("-" + index); + if (isPacked) { + s.append(",=>'" + key + "'"); + } + s.append("," + value); + s.append(",leaf"); + return s.toString(); + } else { + return super.toNodeString(); + } + } + + /* (non-Javadoc) + * @see org.apache.fop.hyphenation.TernaryTreeAnalysis.Node#toCompactString() + */ + public String toCompactString() { + if (isLeafNode) { + StringBuffer s = new StringBuffer(); + s.append("-" + index); + if (isPacked) { + s.append(",=>'" + key + "'"); + } + s.append("," + value); + s.append(",leaf\n"); + return s.toString(); + } else { + return super.toCompactString(); + } + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + StringBuffer s = new StringBuffer(); + s.append(super.toString()); + if (isLeafNode) { + s.append("value: " + value + "\n"); + } + return s.toString(); + } + + } + + private void addNode(int nodeIndex, List strings, NodeString ns) { + int pos = ns.indent + ns.string.length() + 1; + Node n = new Node(nodeIndex); + ns.string.append(n.toNodeString()); + if (n.high != 0) { + ns.high.add(new Integer(pos)); + NodeString highNs = new NodeString(pos); + highNs.low.add(new Integer(pos)); + int index = strings.indexOf(ns); + strings.add(index, highNs); + addNode(n.high, strings, highNs); + } + if (n.low != 0) { + ns.low.add(new Integer(pos)); + NodeString lowNs = new NodeString(pos); + lowNs.high.add(new Integer(pos)); + int index = strings.indexOf(ns); + strings.add(index + 1, lowNs); + addNode(n.low, strings, lowNs); + } + if (!n.isLeafNode) { + addNode(n.equal, strings, ns); + } + + } + + /** + * Construct the tree representation of a list of node strings + * @param strings the list of node strings + * @return the string representing the tree + */ + public String toTree(List strings) { + StringBuffer indentString = new StringBuffer(); + for (int j = indentString.length(); j < ((NodeString) strings.get(0)).indent; ++j) { + indentString.append(' '); + } + StringBuffer tree = new StringBuffer(); + for (int i = 0; i < strings.size(); ++i) { + NodeString ns = (NodeString) strings.get(i); + if (indentString.length() > ns.indent) { + indentString.setLength(ns.indent); + } else { + // should not happen + for (int j = indentString.length(); j < ns.indent; ++j) { + indentString.append(' '); + } + } + tree.append(indentString); + tree.append(ns.string + "\n"); + + if (i + 1 == strings.size()) { + continue; + } + for (int j = 0; j < ns.low.size(); ++j) { + int pos = ((Integer) ns.low.get(j)).intValue(); + if (pos < indentString.length()) { + indentString.setCharAt(pos, '|'); + } else { + for (int k = indentString.length(); k < pos; ++k) { + indentString.append(' '); + } + indentString.append('|'); + } + } + tree.append(indentString + "\n"); + } + + return tree.toString(); + } + + /** + * Construct the tree representation of the HyphenationTree object + * @return the string representing the tree + */ + public String toTree() { + List strings = new ArrayList(); + NodeString ns = new NodeString(0); + strings.add(ns); + addNode(1, strings, ns); + return toTree(strings); + } + + /** + * Construct the compact node representation of the HyphenationTree object + * @return the string representing the tree + */ + public String toCompactNodes() { + StringBuffer s = new StringBuffer(); + for (int i = 1; i < ht.sc.length; ++i) { + if (i != 1) { + s.append("\n"); + } + s.append((new Node(i)).toCompactString()); + } + return s.toString(); + } + + /** + * Construct the node representation of the HyphenationTree object + * @return the string representing the tree + */ + public String toNodes() { + StringBuffer s = new StringBuffer(); + for (int i = 1; i < ht.sc.length; ++i) { + if (i != 1) { + s.append("\n"); + } + s.append((new Node(i)).toString()); + } + return s.toString(); + } + + /** + * Construct the printed representation of the HyphenationTree object + * @return the string representing the tree + */ + public String toString() { + StringBuffer s = new StringBuffer(); + + s.append("classes: \n"); + s.append((new TernaryTreeAnalysis(ht.classmap)).toString()); + + s.append("\npatterns: \n"); + s.append(super.toString()); + s.append("vspace: "); + for (int i = 0; i < ht.vspace.length(); ++i) { + byte v = ht.vspace.get(i); + if (v == 0) { + s.append("--"); + } else { + int c = (int) ((v >>> 4) - 1); + s.append(c); + c = (int) (v & 0x0f); + if (c == 0) { + s.append("-"); + } else { + c = (c - 1); + s.append(c); + } + } + } + s.append("\n"); + + return s.toString(); + } + + /** + * Provide interactive access to a HyphenationTree object and its representation methods + * @param args the arguments + */ + public static void main(String[] args) throws Exception { + HyphenationTree ht = null; + HyphenationTreeAnalysis hta = null; + int minCharCount = 2; + BufferedReader in = new BufferedReader(new java.io.InputStreamReader(System.in)); + while (true) { + System.out.print("l:\tload patterns from XML\n" + + "L:\tload patterns from serialized object\n" + + "s:\tset minimun character count\n" + + "w:\twrite hyphenation tree to object file\n" + + "p:\tprint hyphenation tree to stdout\n" + + "n:\tprint hyphenation tree nodes to stdout\n" + + "c:\tprint compact hyphenation tree nodes to stdout\n" + + "t:\tprint tree representation of hyphenation tree to stdout\n" + + "h:\thyphenate\n" + + "f:\tfind pattern\n" + + "b:\tbenchmark\n" + + "q:\tquit\n\n" + + "Command:"); + String token = in.readLine().trim(); + if (token.equals("f")) { + System.out.print("Pattern: "); + token = in.readLine().trim(); + System.out.println("Values: " + ht.findPattern(token)); + } else if (token.equals("s")) { + System.out.print("Minimum value: "); + token = in.readLine().trim(); + minCharCount = Integer.parseInt(token); + } else if (token.equals("l")) { + ht = new HyphenationTree(); + hta = new HyphenationTreeAnalysis(ht); + System.out.print("XML file name: "); + token = in.readLine().trim(); + ht.loadPatterns(token); + } else if (token.equals("L")) { + ObjectInputStream ois = null; + System.out.print("Object file name: "); + token = in.readLine().trim(); + try { + ois = new ObjectInputStream(new FileInputStream(token)); + ht = (HyphenationTree) ois.readObject(); + hta = new HyphenationTreeAnalysis(ht); + } catch (Exception e) { + e.printStackTrace(); + } finally { + if (ois != null) { + try { + ois.close(); + } catch (IOException e) { + //ignore + } + } + } + } else if (token.equals("w")) { + System.out.print("Object file name: "); + token = in.readLine().trim(); + ObjectOutputStream oos = null; + try { + oos = new ObjectOutputStream(new FileOutputStream(token)); + oos.writeObject(ht); + } catch (Exception e) { + e.printStackTrace(); + } finally { + if (oos != null) { + try { + oos.flush(); + } catch (IOException e) { + //ignore + } + try { + oos.close(); + } catch (IOException e) { + //ignore + } + } + } + } else if (token.equals("p")) { + System.out.print(hta); + } else if (token.equals("n")) { + System.out.print(hta.toNodes()); + } else if (token.equals("c")) { + System.out.print(hta.toCompactNodes()); + } else if (token.equals("t")) { + System.out.print(hta.toTree()); + } else if (token.equals("h")) { + System.out.print("Word: "); + token = in.readLine().trim(); + System.out.print("Hyphenation points: "); + System.out.println(ht.hyphenate(token, minCharCount, + minCharCount)); + } else if (token.equals("b")) { + if (ht == null) { + System.out.println("No patterns have been loaded."); + break; + } + System.out.print("Word list filename: "); + token = in.readLine().trim(); + long starttime = 0; + int counter = 0; + try { + BufferedReader reader = new BufferedReader(new FileReader(token)); + String line; + + starttime = System.currentTimeMillis(); + while ((line = reader.readLine()) != null) { + // System.out.print("\nline: "); + Hyphenation hyp = ht.hyphenate(line, minCharCount, + minCharCount); + if (hyp != null) { + String hword = hyp.toString(); + // System.out.println(line); + // System.out.println(hword); + } else { + // System.out.println("No hyphenation"); + } + counter++; + } + } catch (Exception ioe) { + System.out.println("Exception " + ioe); + ioe.printStackTrace(); + } + long endtime = System.currentTimeMillis(); + long result = endtime - starttime; + System.out.println(counter + " words in " + result + + " Milliseconds hyphenated"); + + } else if (token.equals("q")) { + break; + } + } + + } + +} diff --git a/examples/hyphenation/src/org/apache/fop/hyphenation/TernaryTreeAnalysis.java b/examples/hyphenation/src/org/apache/fop/hyphenation/TernaryTreeAnalysis.java new file mode 100644 index 000000000..a5399475c --- /dev/null +++ b/examples/hyphenation/src/org/apache/fop/hyphenation/TernaryTreeAnalysis.java @@ -0,0 +1,300 @@ +/* + * Copyright 2006 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.hyphenation; + +import java.util.ArrayList; +import java.util.List; + +/** + * This class provides some useful methods to print the structure of a TernaryTree object + */ +public class TernaryTreeAnalysis { + + /** + * The TernaryTree object to analyse + */ + protected TernaryTree tt; + + /** + * @param tt the TernaryTree object + */ + public TernaryTreeAnalysis(TernaryTree tt) { + this.tt = tt; + } + + /** + * Class representing a string of nodes in the tree representation of a TernaryTree + */ + public static class NodeString { + + /** + * The node string being constructed + */ + public StringBuffer string = new StringBuffer(); + + /** + * The indent of the node string + */ + public int indent; + + /** + * The list of branchpoints into the high direction + */ + public List high = new ArrayList(); + + /** + * The list of branchpoints into the low direction + */ + public List low = new ArrayList(); + + /** + * @param indent the indent of the nodestring + */ + public NodeString(int indent) { + this.indent = indent; + string.append("+"); + } + + } + + /** + * Class representing a node of the TernaryTree object + */ + protected class Node { + + /** + * The index of the node + */ + protected int index = 0; + + /** + * The index of the high node + */ + protected int high = 0; + + /** + * The index of the high node + */ + protected int low = 0; + + /** + * The index of the equal node + */ + protected int equal = 0; + + /** + * The key following the node + */ + protected String key = null; + + /** + * True if this is a leaf node + */ + protected boolean isLeafNode = false; + + /** + * True if this is a packed node + */ + protected boolean isPacked = false; + + /** + * @param index the index of the node + */ + protected Node(int index) { + this.index = index; + if (tt.sc[index] == 0) { + isLeafNode = true; + } else if (tt.sc[index] == 0xFFFF) { + isLeafNode = true; + isPacked = true; + key = readKey().toString(); + } else { + key = new String(tt.sc, index, 1); + high = tt.hi[index]; + low = tt.lo[index]; + equal = tt.eq[index]; + } + } + + private StringBuffer readKey() { + StringBuffer s = new StringBuffer(); + int i = (int) tt.lo[index]; + char c = tt.kv.get(i); + for (; c != 0; c = tt.kv.get(++i)) { + s.append(c); + } + return s; + } + + /** + * Construct the string representation of the node + * @return the string representing the node + */ + public String toNodeString() { + StringBuffer s = new StringBuffer(); + if (isLeafNode) { + s.append("-" + index); + if (isPacked) { + s.append(",=>'" + key + "'"); + } + s.append(",leaf"); + } else { + s.append("-" + index + "--" + key + "-"); + } + return s.toString(); + } + + /** + * Construct the compact string representation of the node + * @return the string representing the node + */ + public String toCompactString() { + StringBuffer s = new StringBuffer(); + if (isLeafNode) { + s.append("-" + index); + if (isPacked) { + s.append(",=>'" + key + "'"); + } + s.append(",leaf\n"); + } else { + if (high != 0) { + s.append("(+-" + high + ")\n |\n"); + } + s.append("-" + index + "- " + key + " (-" + equal + ")\n"); + if (low != 0) { + s.append(" |\n(+-" + low + ")\n"); + } + } + return s.toString(); + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + StringBuffer s = new StringBuffer(); + s.append("Node " + index + ":\n"); + if (isLeafNode) { + if (isPacked) { + s.append("key: " + key + "\n"); + } + } else { + s.append("high: " + (high == 0 ? "-" : String.valueOf(high)) + + ", equal: " + equal + + ", low: " + (low == 0 ? "-" : String.valueOf(low)) + + "\n"); + s.append("key: " + key + "\n"); + } + return s.toString(); + } + + } + + /** + * Construct the compact node representation of the TernaryTree object + * @return the string representing the tree + */ + public String toCompactNodes() { + StringBuffer s = new StringBuffer(); + for (int i = 1; i < tt.sc.length; ++i) { + if (i != 1) { + s.append("\n"); + } + s.append((new Node(i)).toCompactString()); + } + return s.toString(); + } + + /** + * Construct the node representation of the TernaryTree object + * @return the string representing the tree + */ + public String toNodes() { + StringBuffer s = new StringBuffer(); + for (int i = 1; i < tt.sc.length; ++i) { + if (i != 1) { + s.append("\n"); + } + s.append((new Node(i)).toString()); + } + return s.toString(); + } + + private static StringBuffer toString(char[] c) { + StringBuffer s = new StringBuffer(); + for (int i = 0; i < c.length; ++i) { + s.append((int) c[i]); + s.append(","); + } + return s; + } + + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ + public String toString() { + StringBuffer s = new StringBuffer(); + + s.append("hi: "); + s.append(toString(tt.hi)); + s.append("\n"); + + s.append("eq: "); + s.append(toString(tt.eq)); + s.append("\n"); + + s.append("lo: "); + s.append(toString(tt.lo)); + s.append("\n"); + + s.append("sc: "); + for (int i = 0; i < tt.sc.length; ++i) { + if (tt.sc[i] == 0) { + s.append("-"); + } else if (tt.sc[i] == 0xFFFF) { + s.append("^"); + } else { + s.append(tt.sc[i]); + } + } + s.append("\n"); + + s.append("kv: "); + for (int i = 0; i < tt.kv.length(); ++i) { + if (tt.kv.get(i) == 0) { + s.append("-"); + } else { + s.append(tt.kv.get(i)); + } + } + s.append("\n"); + + s.append("freenode: "); + s.append((int) tt.freenode); + s.append("\n"); + + s.append("root: "); + s.append((int) tt.root); + s.append("\n"); + + return s.toString(); + } + + +} diff --git a/src/java/org/apache/fop/hyphenation/HyphenationTree.java b/src/java/org/apache/fop/hyphenation/HyphenationTree.java index 51a1a875e..a726d9501 100644 --- a/src/java/org/apache/fop/hyphenation/HyphenationTree.java +++ b/src/java/org/apache/fop/hyphenation/HyphenationTree.java @@ -40,7 +40,7 @@ public class HyphenationTree extends TernaryTree implements PatternConsumer, Serializable { /** - * value space: stores the inteletter values + * value space: stores the interletter values */ protected ByteVector vspace; @@ -469,7 +469,7 @@ public class HyphenationTree extends TernaryTree while (true) { System.out.print("l:\tload patterns from XML\n" + "L:\tload patterns from serialized object\n" - + "s:\tset minimun character count\n" + + "s:\tset minimum character count\n" + "w:\twrite hyphenation tree to object file\n" + "h:\thyphenate\n" + "f:\tfind pattern\n" @@ -539,7 +539,7 @@ public class HyphenationTree extends TernaryTree minCharCount)); } else if (token.equals("b")) { if (ht == null) { - System.out.println("No patterns has been loaded."); + System.out.println("No patterns have been loaded."); break; } System.out.print("Word list filename: "); @@ -572,7 +572,7 @@ public class HyphenationTree extends TernaryTree long endtime = System.currentTimeMillis(); long result = endtime - starttime; System.out.println(counter + " words in " + result - + " Millisekunden hyphenated"); + + " Milliseconds hyphenated"); } else if (token.equals("q")) { break;