You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Hyphenator.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.hyphenation;
  19. import java.io.BufferedInputStream;
  20. import java.io.IOException;
  21. import java.io.InputStream;
  22. import java.io.ObjectInputStream;
  23. import java.net.URISyntaxException;
  24. import java.util.Map;
  25. import org.xml.sax.InputSource;
  26. import org.apache.commons.io.IOUtils;
  27. import org.apache.commons.logging.Log;
  28. import org.apache.commons.logging.LogFactory;
  29. import org.apache.fop.ResourceEventProducer;
  30. import org.apache.fop.apps.io.InternalResourceResolver;
  31. import org.apache.fop.events.EventBroadcaster;
  32. /**
  33. * <p>This class is the main entry point to the hyphenation package.
  34. * You can use only the static methods or create an instance.</p>
  35. *
  36. * <p>This work was authored by Carlos Villegas (cav@uniscope.co.jp).</p>
  37. */
  38. public final class Hyphenator {
  39. /** logging instance */
  40. private static final Log log = LogFactory.getLog(Hyphenator.class);
  41. private static HyphenationTreeCache hTreeCache;
  42. /** Enables a dump of statistics. Note: If activated content is sent to System.out! */
  43. private static boolean statisticsDump;
  44. public static final String HYPTYPE = Hyphenator.class.toString() + "HYP";
  45. public static final String XMLTYPE = Hyphenator.class.toString() + "XML";
  46. /**
  47. * Creates a new hyphenator.
  48. */
  49. private Hyphenator() { }
  50. /** @return the default (static) hyphenation tree cache */
  51. public static synchronized HyphenationTreeCache getHyphenationTreeCache() {
  52. if (hTreeCache == null) {
  53. hTreeCache = new HyphenationTreeCache();
  54. }
  55. return hTreeCache;
  56. }
  57. /**
  58. * Clears the default hyphenation tree cache.<br>
  59. * This method can be used if the underlying data files are changed at runtime.
  60. */
  61. public static synchronized void clearHyphenationTreeCache() {
  62. hTreeCache = new HyphenationTreeCache();
  63. }
  64. /**
  65. * Returns a hyphenation tree for a given language and country,
  66. * with fallback from (lang,country) to (lang).
  67. * The hyphenation trees are cached.
  68. * @param lang the language
  69. * @param country the country (may be null or "none")
  70. * @param resolver resolver to find the hyphenation files
  71. * @param hyphPatNames the map with user-configured hyphenation pattern file names
  72. * @return the hyphenation tree
  73. */
  74. public static HyphenationTree getHyphenationTree(String lang, String country,
  75. InternalResourceResolver resolver, Map hyphPatNames) {
  76. return getHyphenationTree(lang, country, resolver, hyphPatNames, null);
  77. }
  78. public static HyphenationTree getHyphenationTree(String lang, String country,
  79. InternalResourceResolver resourceResolver, Map hyphPatNames, EventBroadcaster eventBroadcaster) {
  80. String llccKey = HyphenationTreeCache.constructLlccKey(lang, country);
  81. HyphenationTreeCache cache = getHyphenationTreeCache();
  82. // If this hyphenation tree has been registered as missing, return immediately
  83. if (cache.isMissing(llccKey)) {
  84. return null;
  85. }
  86. HyphenationTree hTree;
  87. // first try to find it in the cache
  88. hTree = getHyphenationTreeCache().getHyphenationTree(lang, country);
  89. if (hTree != null) {
  90. return hTree;
  91. }
  92. String key = HyphenationTreeCache.constructUserKey(lang, country, hyphPatNames);
  93. if (key == null) {
  94. key = llccKey;
  95. }
  96. if (resourceResolver != null) {
  97. hTree = getUserHyphenationTree(key, resourceResolver);
  98. }
  99. if (hTree == null) {
  100. hTree = getFopHyphenationTree(key);
  101. }
  102. if (hTree == null && country != null && !country.equals("none")) {
  103. return getHyphenationTree(lang, null, resourceResolver, hyphPatNames, eventBroadcaster);
  104. }
  105. // put it into the pattern cache
  106. if (hTree != null) {
  107. cache.cache(llccKey, hTree);
  108. } else {
  109. if (eventBroadcaster == null) {
  110. log.error("Couldn't find hyphenation pattern " + llccKey);
  111. } else {
  112. ResourceEventProducer producer = ResourceEventProducer.Provider.get(eventBroadcaster);
  113. String name = key.replace(HYPTYPE, "").replace(XMLTYPE, "");
  114. producer.hyphenationNotFound(cache, name);
  115. }
  116. cache.noteMissing(llccKey);
  117. }
  118. return hTree;
  119. }
  120. private static InputStream getResourceStream(String key) {
  121. InputStream is = null;
  122. // Try to use Context Class Loader to load the properties file.
  123. try {
  124. java.lang.reflect.Method getCCL = Thread.class.getMethod(
  125. "getContextClassLoader", new Class[0]);
  126. if (getCCL != null) {
  127. ClassLoader contextClassLoader = (ClassLoader)getCCL.invoke(
  128. Thread.currentThread(),
  129. new Object[0]);
  130. is = contextClassLoader.getResourceAsStream("hyph/" + key
  131. + ".hyp");
  132. }
  133. } catch (NoSuchMethodException e) {
  134. //ignore, fallback further down
  135. } catch (IllegalAccessException e) {
  136. //ignore, fallback further down
  137. } catch (java.lang.reflect.InvocationTargetException e) {
  138. //ignore, fallback further down
  139. }
  140. if (is == null) {
  141. is = Hyphenator.class.getResourceAsStream("/hyph/" + key
  142. + ".hyp");
  143. }
  144. return is;
  145. }
  146. private static HyphenationTree readHyphenationTree(InputStream in) {
  147. HyphenationTree hTree = null;
  148. try {
  149. ObjectInputStream ois = new ObjectInputStream(in);
  150. hTree = (HyphenationTree)ois.readObject();
  151. } catch (IOException ioe) {
  152. log.error("I/O error while loading precompiled hyphenation pattern file", ioe);
  153. } catch (ClassNotFoundException cnfe) {
  154. log.error("Error while reading hyphenation object from file", cnfe);
  155. }
  156. return hTree;
  157. }
  158. /**
  159. * Returns a hyphenation tree. This method looks in the resources (getResourceStream) for
  160. * the hyphenation patterns.
  161. * @param key the language/country key
  162. * @return the hyphenation tree or null if it wasn't found in the resources
  163. */
  164. public static HyphenationTree getFopHyphenationTree(String key) {
  165. HyphenationTree hTree = null;
  166. ObjectInputStream ois = null;
  167. InputStream is = null;
  168. try {
  169. is = getResourceStream(key);
  170. if (is == null) {
  171. if (log.isDebugEnabled()) {
  172. log.debug("Couldn't find precompiled hyphenation pattern "
  173. + key + " in resources");
  174. }
  175. return null;
  176. }
  177. hTree = readHyphenationTree(is);
  178. } finally {
  179. IOUtils.closeQuietly(ois);
  180. }
  181. return hTree;
  182. }
  183. /**
  184. * Load tree from serialized file or xml file
  185. * using configuration settings
  186. * @param key language key for the requested hyphenation file
  187. * @param resourceResolver resource resolver to find the hyphenation files
  188. * @return the requested HypenationTree or null if it is not available
  189. */
  190. public static HyphenationTree getUserHyphenationTree(String key,
  191. InternalResourceResolver resourceResolver) {
  192. HyphenationTree hTree = null;
  193. // I use here the following convention. The file name specified in
  194. // the configuration is taken as the base name. First we try
  195. // name + ".hyp" assuming a serialized HyphenationTree. If that fails
  196. // we try name + ".xml", assumming a raw hyphenation pattern file.
  197. // first try serialized object
  198. String name = key + ".hyp";
  199. if (key.endsWith(HYPTYPE)) {
  200. name = key.replace(HYPTYPE, "");
  201. }
  202. if (!key.endsWith(XMLTYPE)) {
  203. try {
  204. InputStream in = getHyphenationTreeStream(name, resourceResolver);
  205. try {
  206. hTree = readHyphenationTree(in);
  207. } finally {
  208. IOUtils.closeQuietly(in);
  209. }
  210. return hTree;
  211. } catch (IOException ioe) {
  212. if (log.isDebugEnabled()) {
  213. log.debug("I/O problem while trying to load " + name, ioe);
  214. }
  215. }
  216. }
  217. // try the raw XML file
  218. name = key + ".xml";
  219. if (key.endsWith(XMLTYPE)) {
  220. name = key.replace(XMLTYPE, "");
  221. }
  222. hTree = new HyphenationTree();
  223. try {
  224. InputStream in = getHyphenationTreeStream(name, resourceResolver);
  225. try {
  226. InputSource src = new InputSource(in);
  227. src.setSystemId(name);
  228. hTree.loadPatterns(src);
  229. } finally {
  230. IOUtils.closeQuietly(in);
  231. }
  232. if (statisticsDump) {
  233. System.out.println("Stats: ");
  234. hTree.printStats();
  235. }
  236. return hTree;
  237. } catch (HyphenationException ex) {
  238. log.error("Can't load user patterns from XML file " + name + ": " + ex.getMessage());
  239. return null;
  240. } catch (IOException ioe) {
  241. if (log.isDebugEnabled()) {
  242. log.debug("I/O problem while trying to load " + name, ioe);
  243. }
  244. return null;
  245. }
  246. }
  247. private static InputStream getHyphenationTreeStream(String name,
  248. InternalResourceResolver resourceResolver) throws IOException {
  249. try {
  250. return new BufferedInputStream(resourceResolver.getResource(name));
  251. } catch (URISyntaxException use) {
  252. log.debug("An exception was thrown while attempting to load " + name, use);
  253. }
  254. return null;
  255. }
  256. /**
  257. * Hyphenates a word.
  258. * @param lang the language
  259. * @param country the optional country code (may be null or "none")
  260. * @param resourceResolver resolver to find the hyphenation files
  261. * @param hyphPatNames the map with user-configured hyphenation pattern file names
  262. * @param word the word to hyphenate
  263. * @param leftMin the minimum number of characters before the hyphenation point
  264. * @param rightMin the minimum number of characters after the hyphenation point
  265. * @return the hyphenation result
  266. */
  267. public static Hyphenation hyphenate(String lang, String country,
  268. InternalResourceResolver resourceResolver, Map hyphPatNames, String word, int leftMin,
  269. int rightMin) {
  270. return hyphenate(lang, country, resourceResolver, hyphPatNames, word, leftMin, rightMin, null);
  271. }
  272. public static Hyphenation hyphenate(String lang, String country, InternalResourceResolver resourceResolver,
  273. Map hyphPatNames, String word, int leftMin, int rightMin,
  274. EventBroadcaster eventBroadcaster) {
  275. HyphenationTree hTree = getHyphenationTree(lang, country, resourceResolver, hyphPatNames, eventBroadcaster);
  276. if (hTree == null) {
  277. return null;
  278. }
  279. return hTree.hyphenate(word, leftMin, rightMin);
  280. }
  281. }