diff options
author | Glen Mazza <gmazza@apache.org> | 2004-09-06 18:03:12 +0000 |
---|---|---|
committer | Glen Mazza <gmazza@apache.org> | 2004-09-06 18:03:12 +0000 |
commit | 04e3d089d6faef74065f9de7fdbec0862f97669b (patch) | |
tree | aaec09825d8cea2eac0dd86ee239f1a52be71b47 /src/java/org/apache/fop/hyphenation/PatternParser.java | |
parent | a0831593665d120fcd6da96b0c33795e21531818 (diff) | |
download | xmlgraphics-fop-04e3d089d6faef74065f9de7fdbec0862f97669b.tar.gz xmlgraphics-fop-04e3d089d6faef74065f9de7fdbec0862f97669b.zip |
PR:
Obtained from:
Submitted by:
Reviewed by:
Moved hyphenation package to org.apache.fop.hyphenation
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@197909 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/fop/hyphenation/PatternParser.java')
-rw-r--r-- | src/java/org/apache/fop/hyphenation/PatternParser.java | 433 |
1 files changed, 433 insertions, 0 deletions
diff --git a/src/java/org/apache/fop/hyphenation/PatternParser.java b/src/java/org/apache/fop/hyphenation/PatternParser.java new file mode 100644 index 000000000..bde436f82 --- /dev/null +++ b/src/java/org/apache/fop/hyphenation/PatternParser.java @@ -0,0 +1,433 @@ +/* + * Copyright 1999-2004 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.hyphenation; + +// SAX +import org.xml.sax.XMLReader; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; +import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.Attributes; + +// Java +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.net.URL; + +/** + * A SAX document handler to read and parse hyphenation patterns + * from a XML file. + * + * @author Carlos Villegas <cav@uniscope.co.jp> + */ +public class PatternParser extends DefaultHandler implements PatternConsumer { + + XMLReader parser; + int currElement; + PatternConsumer consumer; + StringBuffer token; + ArrayList exception; + char hyphenChar; + String errMsg; + + static final int ELEM_CLASSES = 1; + static final int ELEM_EXCEPTIONS = 2; + static final int ELEM_PATTERNS = 3; + static final int ELEM_HYPHEN = 4; + + public PatternParser() throws HyphenationException { + token = new StringBuffer(); + parser = createParser(); + parser.setContentHandler(this); + parser.setErrorHandler(this); + hyphenChar = '-'; // default + + } + + public PatternParser(PatternConsumer consumer) + throws HyphenationException { + this(); + this.consumer = consumer; + } + + public void setConsumer(PatternConsumer consumer) { + this.consumer = consumer; + } + + public void parse(String filename) throws HyphenationException { + InputSource uri = fileInputSource(filename); + + try { + parser.parse(uri); + } catch (SAXException e) { + throw new HyphenationException(errMsg); + } catch (IOException e) { + throw new HyphenationException(e.getMessage()); + } catch (NullPointerException e) { + throw new HyphenationException("SAX parser not available"); + } + } + + /** + * creates a SAX parser, using the value of org.xml.sax.parser + * defaulting to org.apache.xerces.parsers.SAXParser + * + * @return the created SAX parser + */ + static XMLReader createParser() throws HyphenationException { + String parserClassName = System.getProperty("org.xml.sax.parser"); + if (parserClassName == null) { + parserClassName = "org.apache.xerces.parsers.SAXParser"; + } + // System.out.println("using SAX parser " + parserClassName); + + try { + return (XMLReader)Class.forName(parserClassName).newInstance(); + } catch (ClassNotFoundException e) { + throw new HyphenationException("Could not find " + + parserClassName); + } catch (InstantiationException e) { + throw new HyphenationException("Could not instantiate " + + parserClassName); + } catch (IllegalAccessException e) { + throw new HyphenationException("Could not access " + + parserClassName); + } catch (ClassCastException e) { + throw new HyphenationException(parserClassName + + " is not a SAX driver"); + } + } + + /** + * create an InputSource from a file name + * + * @param filename the name of the file + * @return the InputSource created + */ + protected static InputSource fileInputSource(String filename) + throws HyphenationException { + + /* this code adapted from James Clark's in XT */ + File file = new File(filename); + String path = file.getAbsolutePath(); + String fSep = System.getProperty("file.separator"); + if (fSep != null && fSep.length() == 1) { + path = path.replace(fSep.charAt(0), '/'); + } + if (path.length() > 0 && path.charAt(0) != '/') { + path = '/' + path; + } + try { + return new InputSource(new URL("file", null, path).toString()); + } catch (java.net.MalformedURLException e) { + throw new HyphenationException("unexpected MalformedURLException"); + } + } + + protected String readToken(StringBuffer chars) { + String word; + boolean space = false; + int i; + for (i = 0; i < chars.length(); i++) { + if (Character.isWhitespace(chars.charAt(i))) { + space = true; + } else { + break; + } + } + if (space) { + // chars.delete(0,i); + for (int countr = i; countr < chars.length(); countr++) { + chars.setCharAt(countr - i, chars.charAt(countr)); + } + chars.setLength(chars.length() - i); + if (token.length() > 0) { + word = token.toString(); + token.setLength(0); + return word; + } + } + space = false; + for (i = 0; i < chars.length(); i++) { + if (Character.isWhitespace(chars.charAt(i))) { + space = true; + break; + } + } + token.append(chars.toString().substring(0, i)); + // chars.delete(0,i); + for (int countr = i; countr < chars.length(); countr++) { + chars.setCharAt(countr - i, chars.charAt(countr)); + } + chars.setLength(chars.length() - i); + if (space) { + word = token.toString(); + token.setLength(0); + return word; + } + token.append(chars); + return null; + } + + protected static String getPattern(String word) { + StringBuffer pat = new StringBuffer(); + int len = word.length(); + for (int i = 0; i < len; i++) { + if (!Character.isDigit(word.charAt(i))) { + pat.append(word.charAt(i)); + } + } + return pat.toString(); + } + + protected ArrayList normalizeException(ArrayList ex) { + ArrayList res = new ArrayList(); + for (int i = 0; i < ex.size(); i++) { + Object item = ex.get(i); + if (item instanceof String) { + String str = (String)item; + StringBuffer buf = new StringBuffer(); + for (int j = 0; j < str.length(); j++) { + char c = str.charAt(j); + if (c != hyphenChar) { + buf.append(c); + } else { + res.add(buf.toString()); + buf.setLength(0); + char[] h = new char[1]; + h[0] = hyphenChar; + // we use here hyphenChar which is not necessarily + // the one to be printed + res.add(new Hyphen(new String(h), null, null)); + } + } + if (buf.length() > 0) { + res.add(buf.toString()); + } + } else { + res.add(item); + } + } + return res; + } + + protected String getExceptionWord(ArrayList ex) { + StringBuffer res = new StringBuffer(); + for (int i = 0; i < ex.size(); i++) { + Object item = ex.get(i); + if (item instanceof String) { + res.append((String)item); + } else { + if (((Hyphen)item).noBreak != null) { + res.append(((Hyphen)item).noBreak); + } + } + } + return res.toString(); + } + + protected static String getInterletterValues(String pat) { + StringBuffer il = new StringBuffer(); + String word = pat + "a"; // add dummy letter to serve as sentinel + int len = word.length(); + for (int i = 0; i < len; i++) { + char c = word.charAt(i); + if (Character.isDigit(c)) { + il.append(c); + i++; + } else { + il.append('0'); + } + } + return il.toString(); + } + + // + // DocumentHandler methods + // + + /** + * Start element. + */ + public void startElement(String uri, String local, String raw, + Attributes attrs) { + if (local.equals("hyphen-char")) { + String h = attrs.getValue("value"); + if (h != null && h.length() == 1) { + hyphenChar = h.charAt(0); + } + } else if (local.equals("classes")) { + currElement = ELEM_CLASSES; + } else if (local.equals("patterns")) { + currElement = ELEM_PATTERNS; + } else if (local.equals("exceptions")) { + currElement = ELEM_EXCEPTIONS; + exception = new ArrayList(); + } else if (local.equals("hyphen")) { + if (token.length() > 0) { + exception.add(token.toString()); + } + exception.add(new Hyphen(attrs.getValue("pre"), + attrs.getValue("no"), + attrs.getValue("post"))); + currElement = ELEM_HYPHEN; + } + token.setLength(0); + } + + public void endElement(String uri, String local, String raw) { + + if (token.length() > 0) { + String word = token.toString(); + switch (currElement) { + case ELEM_CLASSES: + consumer.addClass(word); + break; + case ELEM_EXCEPTIONS: + exception.add(word); + exception = normalizeException(exception); + consumer.addException(getExceptionWord(exception), + (ArrayList)exception.clone()); + break; + case ELEM_PATTERNS: + consumer.addPattern(getPattern(word), + getInterletterValues(word)); + break; + case ELEM_HYPHEN: + // nothing to do + break; + } + if (currElement != ELEM_HYPHEN) { + token.setLength(0); + } + } + if (currElement == ELEM_HYPHEN) { + currElement = ELEM_EXCEPTIONS; + } else { + currElement = 0; + } + + } + + /** + * Characters. + */ + public void characters(char ch[], int start, int length) { + StringBuffer chars = new StringBuffer(length); + chars.append(ch, start, length); + String word = readToken(chars); + while (word != null) { + // System.out.println("\"" + word + "\""); + switch (currElement) { + case ELEM_CLASSES: + consumer.addClass(word); + break; + case ELEM_EXCEPTIONS: + exception.add(word); + exception = normalizeException(exception); + consumer.addException(getExceptionWord(exception), + (ArrayList)exception.clone()); + exception.clear(); + break; + case ELEM_PATTERNS: + consumer.addPattern(getPattern(word), + getInterletterValues(word)); + break; + } + word = readToken(chars); + } + + } + + // + // ErrorHandler methods + // + + /** + * Warning. + */ + public void warning(SAXParseException ex) { + errMsg = "[Warning] " + getLocationString(ex) + ": " + + ex.getMessage(); + } + + /** + * Error. + */ + public void error(SAXParseException ex) { + errMsg = "[Error] " + getLocationString(ex) + ": " + ex.getMessage(); + } + + /** + * Fatal error. + */ + public void fatalError(SAXParseException ex) throws SAXException { + errMsg = "[Fatal Error] " + getLocationString(ex) + ": " + + ex.getMessage(); + throw ex; + } + + /** + * Returns a string of the location. + */ + private String getLocationString(SAXParseException ex) { + StringBuffer str = new StringBuffer(); + + String systemId = ex.getSystemId(); + if (systemId != null) { + int index = systemId.lastIndexOf('/'); + if (index != -1) { + systemId = systemId.substring(index + 1); + } + str.append(systemId); + } + str.append(':'); + str.append(ex.getLineNumber()); + str.append(':'); + str.append(ex.getColumnNumber()); + + return str.toString(); + + } // getLocationString(SAXParseException):String + + + // PatternConsumer implementation for testing purposes + public void addClass(String c) { + System.out.println("class: " + c); + } + + public void addException(String w, ArrayList e) { + System.out.println("exception: " + w + " : " + e.toString()); + } + + public void addPattern(String p, String v) { + System.out.println("pattern: " + p + " : " + v); + } + + public static void main(String[] args) throws Exception { + if (args.length > 0) { + PatternParser pp = new PatternParser(); + pp.setConsumer(pp); + pp.parse(args[0]); + } + } + +} |