aboutsummaryrefslogtreecommitdiffstats
path: root/src/java/org/apache/fop/hyphenation/PatternParser.java
diff options
context:
space:
mode:
authorGlen Mazza <gmazza@apache.org>2004-09-06 18:03:12 +0000
committerGlen Mazza <gmazza@apache.org>2004-09-06 18:03:12 +0000
commit04e3d089d6faef74065f9de7fdbec0862f97669b (patch)
treeaaec09825d8cea2eac0dd86ee239f1a52be71b47 /src/java/org/apache/fop/hyphenation/PatternParser.java
parenta0831593665d120fcd6da96b0c33795e21531818 (diff)
downloadxmlgraphics-fop-04e3d089d6faef74065f9de7fdbec0862f97669b.tar.gz
xmlgraphics-fop-04e3d089d6faef74065f9de7fdbec0862f97669b.zip
PR:
Obtained from: Submitted by: Reviewed by: Moved hyphenation package to org.apache.fop.hyphenation git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@197909 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/fop/hyphenation/PatternParser.java')
-rw-r--r--src/java/org/apache/fop/hyphenation/PatternParser.java433
1 files changed, 433 insertions, 0 deletions
diff --git a/src/java/org/apache/fop/hyphenation/PatternParser.java b/src/java/org/apache/fop/hyphenation/PatternParser.java
new file mode 100644
index 000000000..bde436f82
--- /dev/null
+++ b/src/java/org/apache/fop/hyphenation/PatternParser.java
@@ -0,0 +1,433 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.hyphenation;
+
+// SAX
+import org.xml.sax.XMLReader;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.helpers.DefaultHandler;
+import org.xml.sax.Attributes;
+
+// Java
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.net.URL;
+
+/**
+ * A SAX document handler to read and parse hyphenation patterns
+ * from a XML file.
+ *
+ * @author Carlos Villegas <cav@uniscope.co.jp>
+ */
+public class PatternParser extends DefaultHandler implements PatternConsumer {
+
+ XMLReader parser;
+ int currElement;
+ PatternConsumer consumer;
+ StringBuffer token;
+ ArrayList exception;
+ char hyphenChar;
+ String errMsg;
+
+ static final int ELEM_CLASSES = 1;
+ static final int ELEM_EXCEPTIONS = 2;
+ static final int ELEM_PATTERNS = 3;
+ static final int ELEM_HYPHEN = 4;
+
+ public PatternParser() throws HyphenationException {
+ token = new StringBuffer();
+ parser = createParser();
+ parser.setContentHandler(this);
+ parser.setErrorHandler(this);
+ hyphenChar = '-'; // default
+
+ }
+
+ public PatternParser(PatternConsumer consumer)
+ throws HyphenationException {
+ this();
+ this.consumer = consumer;
+ }
+
+ public void setConsumer(PatternConsumer consumer) {
+ this.consumer = consumer;
+ }
+
+ public void parse(String filename) throws HyphenationException {
+ InputSource uri = fileInputSource(filename);
+
+ try {
+ parser.parse(uri);
+ } catch (SAXException e) {
+ throw new HyphenationException(errMsg);
+ } catch (IOException e) {
+ throw new HyphenationException(e.getMessage());
+ } catch (NullPointerException e) {
+ throw new HyphenationException("SAX parser not available");
+ }
+ }
+
+ /**
+ * creates a SAX parser, using the value of org.xml.sax.parser
+ * defaulting to org.apache.xerces.parsers.SAXParser
+ *
+ * @return the created SAX parser
+ */
+ static XMLReader createParser() throws HyphenationException {
+ String parserClassName = System.getProperty("org.xml.sax.parser");
+ if (parserClassName == null) {
+ parserClassName = "org.apache.xerces.parsers.SAXParser";
+ }
+ // System.out.println("using SAX parser " + parserClassName);
+
+ try {
+ return (XMLReader)Class.forName(parserClassName).newInstance();
+ } catch (ClassNotFoundException e) {
+ throw new HyphenationException("Could not find "
+ + parserClassName);
+ } catch (InstantiationException e) {
+ throw new HyphenationException("Could not instantiate "
+ + parserClassName);
+ } catch (IllegalAccessException e) {
+ throw new HyphenationException("Could not access "
+ + parserClassName);
+ } catch (ClassCastException e) {
+ throw new HyphenationException(parserClassName
+ + " is not a SAX driver");
+ }
+ }
+
+ /**
+ * create an InputSource from a file name
+ *
+ * @param filename the name of the file
+ * @return the InputSource created
+ */
+ protected static InputSource fileInputSource(String filename)
+ throws HyphenationException {
+
+ /* this code adapted from James Clark's in XT */
+ File file = new File(filename);
+ String path = file.getAbsolutePath();
+ String fSep = System.getProperty("file.separator");
+ if (fSep != null && fSep.length() == 1) {
+ path = path.replace(fSep.charAt(0), '/');
+ }
+ if (path.length() > 0 && path.charAt(0) != '/') {
+ path = '/' + path;
+ }
+ try {
+ return new InputSource(new URL("file", null, path).toString());
+ } catch (java.net.MalformedURLException e) {
+ throw new HyphenationException("unexpected MalformedURLException");
+ }
+ }
+
+ protected String readToken(StringBuffer chars) {
+ String word;
+ boolean space = false;
+ int i;
+ for (i = 0; i < chars.length(); i++) {
+ if (Character.isWhitespace(chars.charAt(i))) {
+ space = true;
+ } else {
+ break;
+ }
+ }
+ if (space) {
+ // chars.delete(0,i);
+ for (int countr = i; countr < chars.length(); countr++) {
+ chars.setCharAt(countr - i, chars.charAt(countr));
+ }
+ chars.setLength(chars.length() - i);
+ if (token.length() > 0) {
+ word = token.toString();
+ token.setLength(0);
+ return word;
+ }
+ }
+ space = false;
+ for (i = 0; i < chars.length(); i++) {
+ if (Character.isWhitespace(chars.charAt(i))) {
+ space = true;
+ break;
+ }
+ }
+ token.append(chars.toString().substring(0, i));
+ // chars.delete(0,i);
+ for (int countr = i; countr < chars.length(); countr++) {
+ chars.setCharAt(countr - i, chars.charAt(countr));
+ }
+ chars.setLength(chars.length() - i);
+ if (space) {
+ word = token.toString();
+ token.setLength(0);
+ return word;
+ }
+ token.append(chars);
+ return null;
+ }
+
+ protected static String getPattern(String word) {
+ StringBuffer pat = new StringBuffer();
+ int len = word.length();
+ for (int i = 0; i < len; i++) {
+ if (!Character.isDigit(word.charAt(i))) {
+ pat.append(word.charAt(i));
+ }
+ }
+ return pat.toString();
+ }
+
+ protected ArrayList normalizeException(ArrayList ex) {
+ ArrayList res = new ArrayList();
+ for (int i = 0; i < ex.size(); i++) {
+ Object item = ex.get(i);
+ if (item instanceof String) {
+ String str = (String)item;
+ StringBuffer buf = new StringBuffer();
+ for (int j = 0; j < str.length(); j++) {
+ char c = str.charAt(j);
+ if (c != hyphenChar) {
+ buf.append(c);
+ } else {
+ res.add(buf.toString());
+ buf.setLength(0);
+ char[] h = new char[1];
+ h[0] = hyphenChar;
+ // we use here hyphenChar which is not necessarily
+ // the one to be printed
+ res.add(new Hyphen(new String(h), null, null));
+ }
+ }
+ if (buf.length() > 0) {
+ res.add(buf.toString());
+ }
+ } else {
+ res.add(item);
+ }
+ }
+ return res;
+ }
+
+ protected String getExceptionWord(ArrayList ex) {
+ StringBuffer res = new StringBuffer();
+ for (int i = 0; i < ex.size(); i++) {
+ Object item = ex.get(i);
+ if (item instanceof String) {
+ res.append((String)item);
+ } else {
+ if (((Hyphen)item).noBreak != null) {
+ res.append(((Hyphen)item).noBreak);
+ }
+ }
+ }
+ return res.toString();
+ }
+
+ protected static String getInterletterValues(String pat) {
+ StringBuffer il = new StringBuffer();
+ String word = pat + "a"; // add dummy letter to serve as sentinel
+ int len = word.length();
+ for (int i = 0; i < len; i++) {
+ char c = word.charAt(i);
+ if (Character.isDigit(c)) {
+ il.append(c);
+ i++;
+ } else {
+ il.append('0');
+ }
+ }
+ return il.toString();
+ }
+
+ //
+ // DocumentHandler methods
+ //
+
+ /**
+ * Start element.
+ */
+ public void startElement(String uri, String local, String raw,
+ Attributes attrs) {
+ if (local.equals("hyphen-char")) {
+ String h = attrs.getValue("value");
+ if (h != null && h.length() == 1) {
+ hyphenChar = h.charAt(0);
+ }
+ } else if (local.equals("classes")) {
+ currElement = ELEM_CLASSES;
+ } else if (local.equals("patterns")) {
+ currElement = ELEM_PATTERNS;
+ } else if (local.equals("exceptions")) {
+ currElement = ELEM_EXCEPTIONS;
+ exception = new ArrayList();
+ } else if (local.equals("hyphen")) {
+ if (token.length() > 0) {
+ exception.add(token.toString());
+ }
+ exception.add(new Hyphen(attrs.getValue("pre"),
+ attrs.getValue("no"),
+ attrs.getValue("post")));
+ currElement = ELEM_HYPHEN;
+ }
+ token.setLength(0);
+ }
+
+ public void endElement(String uri, String local, String raw) {
+
+ if (token.length() > 0) {
+ String word = token.toString();
+ switch (currElement) {
+ case ELEM_CLASSES:
+ consumer.addClass(word);
+ break;
+ case ELEM_EXCEPTIONS:
+ exception.add(word);
+ exception = normalizeException(exception);
+ consumer.addException(getExceptionWord(exception),
+ (ArrayList)exception.clone());
+ break;
+ case ELEM_PATTERNS:
+ consumer.addPattern(getPattern(word),
+ getInterletterValues(word));
+ break;
+ case ELEM_HYPHEN:
+ // nothing to do
+ break;
+ }
+ if (currElement != ELEM_HYPHEN) {
+ token.setLength(0);
+ }
+ }
+ if (currElement == ELEM_HYPHEN) {
+ currElement = ELEM_EXCEPTIONS;
+ } else {
+ currElement = 0;
+ }
+
+ }
+
+ /**
+ * Characters.
+ */
+ public void characters(char ch[], int start, int length) {
+ StringBuffer chars = new StringBuffer(length);
+ chars.append(ch, start, length);
+ String word = readToken(chars);
+ while (word != null) {
+ // System.out.println("\"" + word + "\"");
+ switch (currElement) {
+ case ELEM_CLASSES:
+ consumer.addClass(word);
+ break;
+ case ELEM_EXCEPTIONS:
+ exception.add(word);
+ exception = normalizeException(exception);
+ consumer.addException(getExceptionWord(exception),
+ (ArrayList)exception.clone());
+ exception.clear();
+ break;
+ case ELEM_PATTERNS:
+ consumer.addPattern(getPattern(word),
+ getInterletterValues(word));
+ break;
+ }
+ word = readToken(chars);
+ }
+
+ }
+
+ //
+ // ErrorHandler methods
+ //
+
+ /**
+ * Warning.
+ */
+ public void warning(SAXParseException ex) {
+ errMsg = "[Warning] " + getLocationString(ex) + ": "
+ + ex.getMessage();
+ }
+
+ /**
+ * Error.
+ */
+ public void error(SAXParseException ex) {
+ errMsg = "[Error] " + getLocationString(ex) + ": " + ex.getMessage();
+ }
+
+ /**
+ * Fatal error.
+ */
+ public void fatalError(SAXParseException ex) throws SAXException {
+ errMsg = "[Fatal Error] " + getLocationString(ex) + ": "
+ + ex.getMessage();
+ throw ex;
+ }
+
+ /**
+ * Returns a string of the location.
+ */
+ private String getLocationString(SAXParseException ex) {
+ StringBuffer str = new StringBuffer();
+
+ String systemId = ex.getSystemId();
+ if (systemId != null) {
+ int index = systemId.lastIndexOf('/');
+ if (index != -1) {
+ systemId = systemId.substring(index + 1);
+ }
+ str.append(systemId);
+ }
+ str.append(':');
+ str.append(ex.getLineNumber());
+ str.append(':');
+ str.append(ex.getColumnNumber());
+
+ return str.toString();
+
+ } // getLocationString(SAXParseException):String
+
+
+ // PatternConsumer implementation for testing purposes
+ public void addClass(String c) {
+ System.out.println("class: " + c);
+ }
+
+ public void addException(String w, ArrayList e) {
+ System.out.println("exception: " + w + " : " + e.toString());
+ }
+
+ public void addPattern(String p, String v) {
+ System.out.println("pattern: " + p + " : " + v);
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length > 0) {
+ PatternParser pp = new PatternParser();
+ pp.setConsumer(pp);
+ pp.parse(args[0]);
+ }
+ }
+
+}