You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Hyphenator.java 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.hyphenation;
  19. import java.io.BufferedInputStream;
  20. import java.io.File;
  21. import java.io.IOException;
  22. import java.io.InputStream;
  23. import java.io.ObjectInputStream;
  24. import java.util.Map;
  25. import javax.xml.transform.Source;
  26. import javax.xml.transform.stream.StreamSource;
  27. import org.apache.commons.io.IOUtils;
  28. import org.apache.commons.logging.Log;
  29. import org.apache.commons.logging.LogFactory;
  30. import org.xml.sax.InputSource;
  31. /**
  32. * This class is the main entry point to the hyphenation package.
  33. * You can use only the static methods or create an instance.
  34. *
  35. * @author Carlos Villegas <cav@uniscope.co.jp>
  36. */
  37. public final class Hyphenator {
  38. /** logging instance */
  39. protected static final Log log = LogFactory.getLog(Hyphenator.class);
  40. private static HyphenationTreeCache hTreeCache = null;
  41. /** Enables a dump of statistics. Note: If activated content is sent to System.out! */
  42. private static boolean statisticsDump = false;
  43. /**
  44. * Creates a new hyphenator.
  45. */
  46. private Hyphenator() { }
  47. /** @return the default (static) hyphenation tree cache */
  48. public static synchronized HyphenationTreeCache getHyphenationTreeCache() {
  49. if (hTreeCache == null) {
  50. hTreeCache = new HyphenationTreeCache();
  51. }
  52. return hTreeCache;
  53. }
  54. /**
  55. * Returns a hyphenation tree for a given language and country,
  56. * with fallback from (lang,country) to (lang).
  57. * The hyphenation trees are cached.
  58. * @param lang the language
  59. * @param country the country (may be null or "none")
  60. * @param resolver resolver to find the hyphenation files
  61. * @param hyphPatNames the map with user-configured hyphenation pattern file names
  62. * @return the hyphenation tree
  63. */
  64. public static HyphenationTree getHyphenationTree(String lang,
  65. String country, HyphenationTreeResolver resolver, Map hyphPatNames) {
  66. String llccKey = HyphenationTreeCache.constructLlccKey(lang, country);
  67. HyphenationTreeCache cache = getHyphenationTreeCache();
  68. // If this hyphenation tree has been registered as missing, return immediately
  69. if (cache.isMissing(llccKey)) {
  70. return null;
  71. }
  72. HyphenationTree hTree = getHyphenationTree2(lang, country, resolver, hyphPatNames);
  73. // fallback to lang only
  74. if (hTree == null && country != null && !country.equals("none")) {
  75. String llKey = HyphenationTreeCache.constructLlccKey(lang, null);
  76. if (!cache.isMissing(llKey)) {
  77. hTree = getHyphenationTree2(lang, null, resolver, hyphPatNames);
  78. if (hTree != null && log.isDebugEnabled()) {
  79. log.debug("Couldn't find hyphenation pattern "
  80. + "for lang=\"" + lang + "\",country=\"" + country + "\"."
  81. + " Using general language pattern "
  82. + "for lang=\"" + lang + "\" instead.");
  83. }
  84. if (hTree == null) {
  85. // no fallback; register as missing
  86. cache.noteMissing(llKey);
  87. } else {
  88. // also register for (lang,country)
  89. cache.cache(llccKey, hTree);
  90. }
  91. }
  92. }
  93. if (hTree == null) {
  94. // (lang,country) and (lang) tried; register as missing
  95. cache.noteMissing(llccKey);
  96. log.error("Couldn't find hyphenation pattern "
  97. + "for lang=\"" + lang + "\""
  98. + (country != null && !country.equals("none")
  99. ? ",country=\"" + country + "\""
  100. : "")
  101. + ".");
  102. }
  103. return hTree;
  104. }
  105. /**
  106. * Returns a hyphenation tree for a given language and country
  107. * The hyphenation trees are cached.
  108. * @param lang the language
  109. * @param country the country (may be null or "none")
  110. * @param resolver resolver to find the hyphenation files
  111. * @param hyphPatNames the map with user-configured hyphenation pattern file names
  112. * @return the hyphenation tree
  113. */
  114. private static HyphenationTree getHyphenationTree2(String lang,
  115. String country, HyphenationTreeResolver resolver, Map hyphPatNames) {
  116. String llccKey = HyphenationTreeCache.constructLlccKey(lang, country);
  117. HyphenationTreeCache cache = getHyphenationTreeCache();
  118. HyphenationTree hTree;
  119. // first try to find it in the cache
  120. hTree = getHyphenationTreeCache().getHyphenationTree(lang, country);
  121. if (hTree != null) {
  122. return hTree;
  123. }
  124. String key = HyphenationTreeCache.constructUserKey(lang, country, hyphPatNames);
  125. if (key == null) {
  126. key = llccKey;
  127. }
  128. if (resolver != null) {
  129. hTree = getUserHyphenationTree(key, resolver);
  130. }
  131. if (hTree == null) {
  132. hTree = getFopHyphenationTree(key);
  133. }
  134. // put it into the pattern cache
  135. if (hTree != null) {
  136. cache.cache(llccKey, hTree);
  137. }
  138. return hTree;
  139. }
  140. private static InputStream getResourceStream(String key) {
  141. InputStream is = null;
  142. // Try to use Context Class Loader to load the properties file.
  143. try {
  144. java.lang.reflect.Method getCCL = Thread.class.getMethod(
  145. "getContextClassLoader", new Class[0]);
  146. if (getCCL != null) {
  147. ClassLoader contextClassLoader = (ClassLoader)getCCL.invoke(
  148. Thread.currentThread(),
  149. new Object[0]);
  150. is = contextClassLoader.getResourceAsStream("hyph/" + key
  151. + ".hyp");
  152. }
  153. } catch (Exception e) {
  154. //ignore, fallback further down
  155. }
  156. if (is == null) {
  157. is = Hyphenator.class.getResourceAsStream("/hyph/" + key
  158. + ".hyp");
  159. }
  160. return is;
  161. }
  162. private static HyphenationTree readHyphenationTree(InputStream in) {
  163. HyphenationTree hTree = null;
  164. try {
  165. ObjectInputStream ois = new ObjectInputStream(in);
  166. hTree = (HyphenationTree)ois.readObject();
  167. } catch (IOException ioe) {
  168. log.error("I/O error while loading precompiled hyphenation pattern file", ioe);
  169. } catch (ClassNotFoundException cnfe) {
  170. log.error("Error while reading hyphenation object from file", cnfe);
  171. }
  172. return hTree;
  173. }
  174. /**
  175. * Returns a hyphenation tree. This method looks in the resources (getResourceStream) for
  176. * the hyphenation patterns.
  177. * @param key the language/country key
  178. * @return the hyphenation tree or null if it wasn't found in the resources
  179. */
  180. public static HyphenationTree getFopHyphenationTree(String key) {
  181. HyphenationTree hTree = null;
  182. ObjectInputStream ois = null;
  183. InputStream is = null;
  184. try {
  185. is = getResourceStream(key);
  186. if (is == null) {
  187. if (log.isDebugEnabled()) {
  188. log.debug("Couldn't find precompiled hyphenation pattern "
  189. + key + " in resources");
  190. }
  191. return null;
  192. }
  193. hTree = readHyphenationTree(is);
  194. } finally {
  195. IOUtils.closeQuietly(ois);
  196. }
  197. return hTree;
  198. }
  199. /**
  200. * Load tree from serialized file or xml file
  201. * using configuration settings
  202. * @param key language key for the requested hyphenation file
  203. * @param hyphenDir base directory to find hyphenation files in
  204. * @return the requested HypenationTree or null if it is not available
  205. */
  206. public static HyphenationTree getUserHyphenationTree(String key,
  207. String hyphenDir) {
  208. final File baseDir = new File(hyphenDir);
  209. HyphenationTreeResolver resolver = new HyphenationTreeResolver() {
  210. public Source resolve(String href) {
  211. File f = new File(baseDir, href);
  212. return new StreamSource(f);
  213. }
  214. };
  215. return getUserHyphenationTree(key, resolver);
  216. }
  217. /**
  218. * Load tree from serialized file or xml file
  219. * using configuration settings
  220. * @param key language key for the requested hyphenation file
  221. * @param resolver resolver to find the hyphenation files
  222. * @return the requested HypenationTree or null if it is not available
  223. */
  224. public static HyphenationTree getUserHyphenationTree(String key,
  225. HyphenationTreeResolver resolver) {
  226. HyphenationTree hTree = null;
  227. // I use here the following convention. The file name specified in
  228. // the configuration is taken as the base name. First we try
  229. // name + ".hyp" assuming a serialized HyphenationTree. If that fails
  230. // we try name + ".xml", assumming a raw hyphenation pattern file.
  231. // first try serialized object
  232. String name = key + ".hyp";
  233. Source source = resolver.resolve(name);
  234. if (source != null) {
  235. try {
  236. InputStream in = null;
  237. if (source instanceof StreamSource) {
  238. in = ((StreamSource) source).getInputStream();
  239. }
  240. if (in == null) {
  241. if (source.getSystemId() != null) {
  242. in = new java.net.URL(source.getSystemId()).openStream();
  243. } else {
  244. throw new UnsupportedOperationException
  245. ("Cannot load hyphenation pattern file"
  246. + " with the supplied Source object: " + source);
  247. }
  248. }
  249. in = new BufferedInputStream(in);
  250. try {
  251. hTree = readHyphenationTree(in);
  252. } finally {
  253. IOUtils.closeQuietly(in);
  254. }
  255. return hTree;
  256. } catch (IOException ioe) {
  257. if (log.isDebugEnabled()) {
  258. log.debug("I/O problem while trying to load " + name, ioe);
  259. }
  260. }
  261. }
  262. // try the raw XML file
  263. name = key + ".xml";
  264. source = resolver.resolve(name);
  265. if (source != null) {
  266. hTree = new HyphenationTree();
  267. try {
  268. InputStream in = null;
  269. if (source instanceof StreamSource) {
  270. in = ((StreamSource) source).getInputStream();
  271. }
  272. if (in == null) {
  273. if (source.getSystemId() != null) {
  274. in = new java.net.URL(source.getSystemId()).openStream();
  275. } else {
  276. throw new UnsupportedOperationException(
  277. "Cannot load hyphenation pattern file"
  278. + " with the supplied Source object: " + source);
  279. }
  280. }
  281. if (!(in instanceof BufferedInputStream)) {
  282. in = new BufferedInputStream(in);
  283. }
  284. try {
  285. InputSource src = new InputSource(in);
  286. src.setSystemId(source.getSystemId());
  287. hTree.loadPatterns(src);
  288. } finally {
  289. IOUtils.closeQuietly(in);
  290. }
  291. if (statisticsDump) {
  292. System.out.println("Stats: ");
  293. hTree.printStats();
  294. }
  295. return hTree;
  296. } catch (HyphenationException ex) {
  297. log.error("Can't load user patterns from XML file " + source.getSystemId()
  298. + ": " + ex.getMessage());
  299. return null;
  300. } catch (IOException ioe) {
  301. if (log.isDebugEnabled()) {
  302. log.debug("I/O problem while trying to load " + name, ioe);
  303. }
  304. return null;
  305. }
  306. } else {
  307. if (log.isDebugEnabled()) {
  308. log.debug("Could not load user hyphenation file for '" + key + "'.");
  309. }
  310. return null;
  311. }
  312. }
  313. /**
  314. * Hyphenates a word.
  315. * @param lang the language
  316. * @param country the optional country code (may be null or "none")
  317. * @param resolver resolver to find the hyphenation files
  318. * @param hyphPatNames the map with user-configured hyphenation pattern file names
  319. * @param word the word to hyphenate
  320. * @param leftMin the minimum number of characters before the hyphenation point
  321. * @param rightMin the minimum number of characters after the hyphenation point
  322. * @return the hyphenation result
  323. */
  324. public static Hyphenation hyphenate(String lang, String country,
  325. HyphenationTreeResolver resolver,
  326. Map hyphPatNames,
  327. String word,
  328. int leftMin, int rightMin) {
  329. HyphenationTree hTree = getHyphenationTree(lang, country, resolver, hyphPatNames);
  330. if (hTree == null) {
  331. return null;
  332. }
  333. return hTree.hyphenate(word, leftMin, rightMin);
  334. }
  335. }