You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Hyphenator.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.hyphenation;
  19. import java.io.BufferedInputStream;
  20. import java.io.IOException;
  21. import java.io.InputStream;
  22. import java.io.ObjectInputStream;
  23. import java.net.URISyntaxException;
  24. import java.util.Map;
  25. import org.xml.sax.InputSource;
  26. import org.apache.commons.io.IOUtils;
  27. import org.apache.commons.logging.Log;
  28. import org.apache.commons.logging.LogFactory;
  29. import org.apache.fop.apps.io.InternalResourceResolver;
  30. /**
  31. * <p>This class is the main entry point to the hyphenation package.
  32. * You can use only the static methods or create an instance.</p>
  33. *
  34. * <p>This work was authored by Carlos Villegas (cav@uniscope.co.jp).</p>
  35. */
  36. public final class Hyphenator {
  37. /** logging instance */
  38. protected static final Log log = LogFactory.getLog(Hyphenator.class);
  39. private static HyphenationTreeCache hTreeCache = null;
  40. /** Enables a dump of statistics. Note: If activated content is sent to System.out! */
  41. private static boolean statisticsDump = false;
  42. /**
  43. * Creates a new hyphenator.
  44. */
  45. private Hyphenator() { }
  46. /** @return the default (static) hyphenation tree cache */
  47. public static synchronized HyphenationTreeCache getHyphenationTreeCache() {
  48. if (hTreeCache == null) {
  49. hTreeCache = new HyphenationTreeCache();
  50. }
  51. return hTreeCache;
  52. }
  53. /**
  54. * Returns a hyphenation tree for a given language and country,
  55. * with fallback from (lang,country) to (lang).
  56. * The hyphenation trees are cached.
  57. * @param lang the language
  58. * @param country the country (may be null or "none")
  59. * @param resourceResolver resolver to find the hyphenation files
  60. * @param hyphPatNames the map with user-configured hyphenation pattern file names
  61. * @return the hyphenation tree
  62. */
  63. public static HyphenationTree getHyphenationTree(String lang,
  64. String country, InternalResourceResolver resourceResolver, Map hyphPatNames) {
  65. String llccKey = HyphenationTreeCache.constructLlccKey(lang, country);
  66. HyphenationTreeCache cache = getHyphenationTreeCache();
  67. // If this hyphenation tree has been registered as missing, return immediately
  68. if (cache.isMissing(llccKey)) {
  69. return null;
  70. }
  71. HyphenationTree hTree = getHyphenationTree2(lang, country, resourceResolver, hyphPatNames);
  72. // fallback to lang only
  73. if (hTree == null && country != null && !country.equals("none")) {
  74. String llKey = HyphenationTreeCache.constructLlccKey(lang, null);
  75. if (!cache.isMissing(llKey)) {
  76. hTree = getHyphenationTree2(lang, null, resourceResolver, hyphPatNames);
  77. if (hTree != null && log.isDebugEnabled()) {
  78. log.debug("Couldn't find hyphenation pattern "
  79. + "for lang=\"" + lang + "\",country=\"" + country + "\"."
  80. + " Using general language pattern "
  81. + "for lang=\"" + lang + "\" instead.");
  82. }
  83. if (hTree == null) {
  84. // no fallback; register as missing
  85. cache.noteMissing(llKey);
  86. } else {
  87. // also register for (lang,country)
  88. cache.cache(llccKey, hTree);
  89. }
  90. }
  91. }
  92. if (hTree == null) {
  93. // (lang,country) and (lang) tried; register as missing
  94. cache.noteMissing(llccKey);
  95. log.error("Couldn't find hyphenation pattern "
  96. + "for lang=\"" + lang + "\""
  97. + (country != null && !country.equals("none")
  98. ? ",country=\"" + country + "\""
  99. : "")
  100. + ".");
  101. }
  102. return hTree;
  103. }
  104. /**
  105. * Returns a hyphenation tree for a given language and country
  106. * The hyphenation trees are cached.
  107. * @param lang the language
  108. * @param country the country (may be null or "none")
  109. * @param resourceResolver resource resolver to find the hyphenation files
  110. * @param hyphPatNames the map with user-configured hyphenation pattern file names
  111. * @return the hyphenation tree
  112. */
  113. public static HyphenationTree getHyphenationTree2(String lang,
  114. String country, InternalResourceResolver resourceResolver, Map hyphPatNames) {
  115. String llccKey = HyphenationTreeCache.constructLlccKey(lang, country);
  116. HyphenationTreeCache cache = getHyphenationTreeCache();
  117. HyphenationTree hTree;
  118. // first try to find it in the cache
  119. hTree = getHyphenationTreeCache().getHyphenationTree(lang, country);
  120. if (hTree != null) {
  121. return hTree;
  122. }
  123. String key = HyphenationTreeCache.constructUserKey(lang, country, hyphPatNames);
  124. if (key == null) {
  125. key = llccKey;
  126. }
  127. if (resourceResolver != null) {
  128. hTree = getUserHyphenationTree(key, resourceResolver);
  129. }
  130. if (hTree == null) {
  131. hTree = getFopHyphenationTree(key);
  132. }
  133. // put it into the pattern cache
  134. if (hTree != null) {
  135. cache.cache(llccKey, hTree);
  136. }
  137. return hTree;
  138. }
  139. private static InputStream getResourceStream(String key) {
  140. InputStream is = null;
  141. // Try to use Context Class Loader to load the properties file.
  142. try {
  143. java.lang.reflect.Method getCCL = Thread.class.getMethod(
  144. "getContextClassLoader", new Class[0]);
  145. if (getCCL != null) {
  146. ClassLoader contextClassLoader = (ClassLoader)getCCL.invoke(
  147. Thread.currentThread(),
  148. new Object[0]);
  149. is = contextClassLoader.getResourceAsStream("hyph/" + key
  150. + ".hyp");
  151. }
  152. } catch (Exception e) {
  153. //ignore, fallback further down
  154. }
  155. if (is == null) {
  156. is = Hyphenator.class.getResourceAsStream("/hyph/" + key
  157. + ".hyp");
  158. }
  159. return is;
  160. }
  161. private static HyphenationTree readHyphenationTree(InputStream in) {
  162. HyphenationTree hTree = null;
  163. try {
  164. ObjectInputStream ois = new ObjectInputStream(in);
  165. hTree = (HyphenationTree)ois.readObject();
  166. } catch (IOException ioe) {
  167. log.error("I/O error while loading precompiled hyphenation pattern file", ioe);
  168. } catch (ClassNotFoundException cnfe) {
  169. log.error("Error while reading hyphenation object from file", cnfe);
  170. }
  171. return hTree;
  172. }
  173. /**
  174. * Returns a hyphenation tree. This method looks in the resources (getResourceStream) for
  175. * the hyphenation patterns.
  176. * @param key the language/country key
  177. * @return the hyphenation tree or null if it wasn't found in the resources
  178. */
  179. public static HyphenationTree getFopHyphenationTree(String key) {
  180. HyphenationTree hTree = null;
  181. ObjectInputStream ois = null;
  182. InputStream is = null;
  183. try {
  184. is = getResourceStream(key);
  185. if (is == null) {
  186. if (log.isDebugEnabled()) {
  187. log.debug("Couldn't find precompiled hyphenation pattern "
  188. + key + " in resources");
  189. }
  190. return null;
  191. }
  192. hTree = readHyphenationTree(is);
  193. } finally {
  194. IOUtils.closeQuietly(ois);
  195. }
  196. return hTree;
  197. }
  198. /**
  199. * Load tree from serialized file or xml file
  200. * using configuration settings
  201. * @param key language key for the requested hyphenation file
  202. * @param resourceResolver resource resolver to find the hyphenation files
  203. * @return the requested HypenationTree or null if it is not available
  204. */
  205. public static HyphenationTree getUserHyphenationTree(String key,
  206. InternalResourceResolver resourceResolver) {
  207. HyphenationTree hTree = null;
  208. // I use here the following convention. The file name specified in
  209. // the configuration is taken as the base name. First we try
  210. // name + ".hyp" assuming a serialized HyphenationTree. If that fails
  211. // we try name + ".xml", assumming a raw hyphenation pattern file.
  212. // first try serialized object
  213. String name = key + ".hyp";
  214. try {
  215. InputStream in = getHyphenationTreeStream(name, resourceResolver);
  216. try {
  217. hTree = readHyphenationTree(in);
  218. } finally {
  219. IOUtils.closeQuietly(in);
  220. }
  221. return hTree;
  222. } catch (IOException ioe) {
  223. if (log.isDebugEnabled()) {
  224. log.debug("I/O problem while trying to load " + name, ioe);
  225. }
  226. }
  227. // try the raw XML file
  228. name = key + ".xml";
  229. hTree = new HyphenationTree();
  230. try {
  231. InputStream in = getHyphenationTreeStream(name, resourceResolver);
  232. try {
  233. InputSource src = new InputSource(in);
  234. src.setSystemId(name);
  235. hTree.loadPatterns(src);
  236. } finally {
  237. IOUtils.closeQuietly(in);
  238. }
  239. if (statisticsDump) {
  240. System.out.println("Stats: ");
  241. hTree.printStats();
  242. }
  243. return hTree;
  244. } catch (HyphenationException ex) {
  245. log.error("Can't load user patterns from XML file " + name + ": " + ex.getMessage());
  246. return null;
  247. } catch (IOException ioe) {
  248. if (log.isDebugEnabled()) {
  249. log.debug("I/O problem while trying to load " + name, ioe);
  250. }
  251. return null;
  252. }
  253. }
  254. private static InputStream getHyphenationTreeStream(String name,
  255. InternalResourceResolver resourceResolver) throws IOException {
  256. try {
  257. return new BufferedInputStream(resourceResolver.getResource(name));
  258. } catch (URISyntaxException use) {
  259. log.debug("An exception was thrown while attempting to load " + name, use);
  260. }
  261. return null;
  262. }
  263. /**
  264. * Hyphenates a word.
  265. * @param lang the language
  266. * @param country the optional country code (may be null or "none")
  267. * @param resourceResolver resource resolver to find the hyphenation files
  268. * @param hyphPatNames the map with user-configured hyphenation pattern file names
  269. * @param word the word to hyphenate
  270. * @param leftMin the minimum number of characters before the hyphenation point
  271. * @param rightMin the minimum number of characters after the hyphenation point
  272. * @return the hyphenation result
  273. */
  274. public static Hyphenation hyphenate(String lang, String country,
  275. InternalResourceResolver resourceResolver, Map hyphPatNames, String word, int leftMin,
  276. int rightMin) {
  277. HyphenationTree hTree = getHyphenationTree(lang, country, resourceResolver, hyphPatNames);
  278. if (hTree == null) {
  279. return null;
  280. }
  281. return hTree.hyphenate(word, leftMin, rightMin);
  282. }
  283. }