You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PatternParser.java 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /*
  2. * Copyright 1999-2004,2006 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /* $Id$ */
  17. package org.apache.fop.hyphenation;
  18. // SAX
  19. import org.xml.sax.XMLReader;
  20. import org.xml.sax.InputSource;
  21. import org.xml.sax.SAXException;
  22. import org.xml.sax.SAXParseException;
  23. import org.xml.sax.helpers.DefaultHandler;
  24. import org.xml.sax.Attributes;
  25. // Java
  26. import java.io.File;
  27. import java.io.FileNotFoundException;
  28. import java.io.IOException;
  29. import java.net.MalformedURLException;
  30. import java.util.ArrayList;
  31. import javax.xml.parsers.SAXParserFactory;
  32. /**
  33. * A SAX document handler to read and parse hyphenation patterns
  34. * from a XML file.
  35. *
  36. * @author Carlos Villegas <cav@uniscope.co.jp>
  37. */
  38. public class PatternParser extends DefaultHandler implements PatternConsumer {
  39. XMLReader parser;
  40. int currElement;
  41. PatternConsumer consumer;
  42. StringBuffer token;
  43. ArrayList exception;
  44. char hyphenChar;
  45. String errMsg;
  46. static final int ELEM_CLASSES = 1;
  47. static final int ELEM_EXCEPTIONS = 2;
  48. static final int ELEM_PATTERNS = 3;
  49. static final int ELEM_HYPHEN = 4;
  50. public PatternParser() throws HyphenationException {
  51. token = new StringBuffer();
  52. parser = createParser();
  53. parser.setContentHandler(this);
  54. parser.setErrorHandler(this);
  55. hyphenChar = '-'; // default
  56. }
  57. public PatternParser(PatternConsumer consumer)
  58. throws HyphenationException {
  59. this();
  60. this.consumer = consumer;
  61. }
  62. public void setConsumer(PatternConsumer consumer) {
  63. this.consumer = consumer;
  64. }
  65. /**
  66. * Parses a hyphenation pattern file.
  67. * @param filename the filename
  68. * @throws HyphenationException In case of an exception while parsing
  69. */
  70. public void parse(String filename) throws HyphenationException {
  71. parse(new File(filename));
  72. }
  73. /**
  74. * Parses a hyphenation pattern file.
  75. * @param file the pattern file
  76. * @throws HyphenationException In case of an exception while parsing
  77. */
  78. public void parse(File file) throws HyphenationException {
  79. try {
  80. InputSource src = new InputSource(file.toURL().toExternalForm());
  81. parse(src);
  82. } catch (MalformedURLException e) {
  83. throw new HyphenationException("Error converting the File '" + file + "' to a URL: "
  84. + e.getMessage());
  85. }
  86. }
  87. /**
  88. * Parses a hyphenation pattern file.
  89. * @param source the InputSource for the file
  90. * @throws HyphenationException In case of an exception while parsing
  91. */
  92. public void parse(InputSource source) throws HyphenationException {
  93. try {
  94. parser.parse(source);
  95. } catch (FileNotFoundException fnfe) {
  96. throw new HyphenationException("File not found: " + fnfe.getMessage());
  97. } catch (IOException ioe) {
  98. throw new HyphenationException(ioe.getMessage());
  99. } catch (SAXException e) {
  100. throw new HyphenationException(errMsg);
  101. }
  102. }
  103. /**
  104. * Creates a SAX parser using JAXP
  105. * @return the created SAX parser
  106. */
  107. static XMLReader createParser() {
  108. try {
  109. SAXParserFactory factory = SAXParserFactory.newInstance();
  110. factory.setNamespaceAware(true);
  111. return factory.newSAXParser().getXMLReader();
  112. } catch (Exception e) {
  113. throw new RuntimeException("Couldn't create XMLReader: " + e.getMessage());
  114. }
  115. }
  116. protected String readToken(StringBuffer chars) {
  117. String word;
  118. boolean space = false;
  119. int i;
  120. for (i = 0; i < chars.length(); i++) {
  121. if (Character.isWhitespace(chars.charAt(i))) {
  122. space = true;
  123. } else {
  124. break;
  125. }
  126. }
  127. if (space) {
  128. // chars.delete(0,i);
  129. for (int countr = i; countr < chars.length(); countr++) {
  130. chars.setCharAt(countr - i, chars.charAt(countr));
  131. }
  132. chars.setLength(chars.length() - i);
  133. if (token.length() > 0) {
  134. word = token.toString();
  135. token.setLength(0);
  136. return word;
  137. }
  138. }
  139. space = false;
  140. for (i = 0; i < chars.length(); i++) {
  141. if (Character.isWhitespace(chars.charAt(i))) {
  142. space = true;
  143. break;
  144. }
  145. }
  146. token.append(chars.toString().substring(0, i));
  147. // chars.delete(0,i);
  148. for (int countr = i; countr < chars.length(); countr++) {
  149. chars.setCharAt(countr - i, chars.charAt(countr));
  150. }
  151. chars.setLength(chars.length() - i);
  152. if (space) {
  153. word = token.toString();
  154. token.setLength(0);
  155. return word;
  156. }
  157. token.append(chars);
  158. return null;
  159. }
  160. protected static String getPattern(String word) {
  161. StringBuffer pat = new StringBuffer();
  162. int len = word.length();
  163. for (int i = 0; i < len; i++) {
  164. if (!Character.isDigit(word.charAt(i))) {
  165. pat.append(word.charAt(i));
  166. }
  167. }
  168. return pat.toString();
  169. }
  170. protected ArrayList normalizeException(ArrayList ex) {
  171. ArrayList res = new ArrayList();
  172. for (int i = 0; i < ex.size(); i++) {
  173. Object item = ex.get(i);
  174. if (item instanceof String) {
  175. String str = (String)item;
  176. StringBuffer buf = new StringBuffer();
  177. for (int j = 0; j < str.length(); j++) {
  178. char c = str.charAt(j);
  179. if (c != hyphenChar) {
  180. buf.append(c);
  181. } else {
  182. res.add(buf.toString());
  183. buf.setLength(0);
  184. char[] h = new char[1];
  185. h[0] = hyphenChar;
  186. // we use here hyphenChar which is not necessarily
  187. // the one to be printed
  188. res.add(new Hyphen(new String(h), null, null));
  189. }
  190. }
  191. if (buf.length() > 0) {
  192. res.add(buf.toString());
  193. }
  194. } else {
  195. res.add(item);
  196. }
  197. }
  198. return res;
  199. }
  200. protected String getExceptionWord(ArrayList ex) {
  201. StringBuffer res = new StringBuffer();
  202. for (int i = 0; i < ex.size(); i++) {
  203. Object item = ex.get(i);
  204. if (item instanceof String) {
  205. res.append((String)item);
  206. } else {
  207. if (((Hyphen)item).noBreak != null) {
  208. res.append(((Hyphen)item).noBreak);
  209. }
  210. }
  211. }
  212. return res.toString();
  213. }
  214. protected static String getInterletterValues(String pat) {
  215. StringBuffer il = new StringBuffer();
  216. String word = pat + "a"; // add dummy letter to serve as sentinel
  217. int len = word.length();
  218. for (int i = 0; i < len; i++) {
  219. char c = word.charAt(i);
  220. if (Character.isDigit(c)) {
  221. il.append(c);
  222. i++;
  223. } else {
  224. il.append('0');
  225. }
  226. }
  227. return il.toString();
  228. }
  229. //
  230. // ContentHandler methods
  231. //
  232. /**
  233. * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
  234. */
  235. public void startElement(String uri, String local, String raw,
  236. Attributes attrs) {
  237. if (local.equals("hyphen-char")) {
  238. String h = attrs.getValue("value");
  239. if (h != null && h.length() == 1) {
  240. hyphenChar = h.charAt(0);
  241. }
  242. } else if (local.equals("classes")) {
  243. currElement = ELEM_CLASSES;
  244. } else if (local.equals("patterns")) {
  245. currElement = ELEM_PATTERNS;
  246. } else if (local.equals("exceptions")) {
  247. currElement = ELEM_EXCEPTIONS;
  248. exception = new ArrayList();
  249. } else if (local.equals("hyphen")) {
  250. if (token.length() > 0) {
  251. exception.add(token.toString());
  252. }
  253. exception.add(new Hyphen(attrs.getValue("pre"),
  254. attrs.getValue("no"),
  255. attrs.getValue("post")));
  256. currElement = ELEM_HYPHEN;
  257. }
  258. token.setLength(0);
  259. }
  260. /**
  261. * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
  262. */
  263. public void endElement(String uri, String local, String raw) {
  264. if (token.length() > 0) {
  265. String word = token.toString();
  266. switch (currElement) {
  267. case ELEM_CLASSES:
  268. consumer.addClass(word);
  269. break;
  270. case ELEM_EXCEPTIONS:
  271. exception.add(word);
  272. exception = normalizeException(exception);
  273. consumer.addException(getExceptionWord(exception),
  274. (ArrayList)exception.clone());
  275. break;
  276. case ELEM_PATTERNS:
  277. consumer.addPattern(getPattern(word),
  278. getInterletterValues(word));
  279. break;
  280. case ELEM_HYPHEN:
  281. // nothing to do
  282. break;
  283. }
  284. if (currElement != ELEM_HYPHEN) {
  285. token.setLength(0);
  286. }
  287. }
  288. if (currElement == ELEM_HYPHEN) {
  289. currElement = ELEM_EXCEPTIONS;
  290. } else {
  291. currElement = 0;
  292. }
  293. }
  294. /**
  295. * @see org.xml.sax.ContentHandler#characters(char[], int, int)
  296. */
  297. public void characters(char ch[], int start, int length) {
  298. StringBuffer chars = new StringBuffer(length);
  299. chars.append(ch, start, length);
  300. String word = readToken(chars);
  301. while (word != null) {
  302. // System.out.println("\"" + word + "\"");
  303. switch (currElement) {
  304. case ELEM_CLASSES:
  305. consumer.addClass(word);
  306. break;
  307. case ELEM_EXCEPTIONS:
  308. exception.add(word);
  309. exception = normalizeException(exception);
  310. consumer.addException(getExceptionWord(exception),
  311. (ArrayList)exception.clone());
  312. exception.clear();
  313. break;
  314. case ELEM_PATTERNS:
  315. consumer.addPattern(getPattern(word),
  316. getInterletterValues(word));
  317. break;
  318. }
  319. word = readToken(chars);
  320. }
  321. }
  322. //
  323. // ErrorHandler methods
  324. //
  325. /**
  326. * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
  327. */
  328. public void warning(SAXParseException ex) {
  329. errMsg = "[Warning] " + getLocationString(ex) + ": "
  330. + ex.getMessage();
  331. }
  332. /**
  333. * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
  334. */
  335. public void error(SAXParseException ex) {
  336. errMsg = "[Error] " + getLocationString(ex) + ": " + ex.getMessage();
  337. }
  338. /**
  339. * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
  340. */
  341. public void fatalError(SAXParseException ex) throws SAXException {
  342. errMsg = "[Fatal Error] " + getLocationString(ex) + ": "
  343. + ex.getMessage();
  344. throw ex;
  345. }
  346. /**
  347. * Returns a string of the location.
  348. */
  349. private String getLocationString(SAXParseException ex) {
  350. StringBuffer str = new StringBuffer();
  351. String systemId = ex.getSystemId();
  352. if (systemId != null) {
  353. int index = systemId.lastIndexOf('/');
  354. if (index != -1) {
  355. systemId = systemId.substring(index + 1);
  356. }
  357. str.append(systemId);
  358. }
  359. str.append(':');
  360. str.append(ex.getLineNumber());
  361. str.append(':');
  362. str.append(ex.getColumnNumber());
  363. return str.toString();
  364. } // getLocationString(SAXParseException):String
  365. // PatternConsumer implementation for testing purposes
  366. public void addClass(String c) {
  367. System.out.println("class: " + c);
  368. }
  369. public void addException(String w, ArrayList e) {
  370. System.out.println("exception: " + w + " : " + e.toString());
  371. }
  372. public void addPattern(String p, String v) {
  373. System.out.println("pattern: " + p + " : " + v);
  374. }
  375. public static void main(String[] args) throws Exception {
  376. if (args.length > 0) {
  377. PatternParser pp = new PatternParser();
  378. pp.setConsumer(pp);
  379. pp.parse(args[0]);
  380. }
  381. }
  382. }