You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

HyphenationTreeAnalysis.java 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.hyphenation;
  19. import java.io.BufferedReader;
  20. import java.io.FileInputStream;
  21. import java.io.FileOutputStream;
  22. import java.io.FileReader;
  23. import java.io.IOException;
  24. import java.io.InputStream;
  25. import java.io.ObjectInputStream;
  26. import java.io.ObjectOutputStream;
  27. import java.util.ArrayList;
  28. import java.util.List;
  29. import java.util.zip.ZipFile;
  30. import java.util.zip.ZipEntry;
  31. /**
  32. * This class provides some useful methods to print the structure of a HyphenationTree object
  33. */
  34. public class HyphenationTreeAnalysis extends TernaryTreeAnalysis {
  35. /**
  36. * The HyphenationTree object to analyse
  37. */
  38. protected HyphenationTree ht;
  39. /**
  40. * @param ht the HyphenationTree object
  41. */
  42. public HyphenationTreeAnalysis(HyphenationTree ht) {
  43. super(ht);
  44. this.ht = ht;
  45. }
  46. /**
  47. * Class representing a node of the HyphenationTree object
  48. */
  49. protected class Node extends TernaryTreeAnalysis.Node {
  50. private String value = null;
  51. /**
  52. * @param index the index of the node
  53. */
  54. protected Node(int index) {
  55. super(index);
  56. if (isLeafNode) {
  57. value = readValue().toString();
  58. }
  59. }
  60. private StringBuffer readValue() {
  61. StringBuffer s = new StringBuffer();
  62. int i = (int) ht.eq[index];
  63. byte v = ht.vspace.get(i);
  64. for (; v != 0; v = ht.vspace.get(++i)) {
  65. int c = (int) ((v >>> 4) - 1);
  66. s.append(c);
  67. c = (int) (v & 0x0f);
  68. if (c == 0) {
  69. break;
  70. }
  71. c = (c - 1);
  72. s.append(c);
  73. }
  74. return s;
  75. }
  76. /* (non-Javadoc)
  77. * @see org.apache.fop.hyphenation.TernaryTreeAnalysis.Node#toNodeString()
  78. */
  79. public String toNodeString() {
  80. if (isLeafNode) {
  81. StringBuffer s = new StringBuffer();
  82. s.append("-" + index);
  83. if (isPacked) {
  84. s.append(",=>'" + key + "'");
  85. }
  86. s.append("," + value);
  87. s.append(",leaf");
  88. return s.toString();
  89. } else {
  90. return super.toNodeString();
  91. }
  92. }
  93. /* (non-Javadoc)
  94. * @see org.apache.fop.hyphenation.TernaryTreeAnalysis.Node#toCompactString()
  95. */
  96. public String toCompactString() {
  97. if (isLeafNode) {
  98. StringBuffer s = new StringBuffer();
  99. s.append("-" + index);
  100. if (isPacked) {
  101. s.append(",=>'" + key + "'");
  102. }
  103. s.append("," + value);
  104. s.append(",leaf\n");
  105. return s.toString();
  106. } else {
  107. return super.toCompactString();
  108. }
  109. }
  110. /* (non-Javadoc)
  111. * @see java.lang.Object#toString()
  112. */
  113. public String toString() {
  114. StringBuffer s = new StringBuffer();
  115. s.append(super.toString());
  116. if (isLeafNode) {
  117. s.append("value: " + value + "\n");
  118. }
  119. return s.toString();
  120. }
  121. }
  122. private void addNode(int nodeIndex, List strings, NodeString ns) {
  123. int pos = ns.indent + ns.string.length() + 1;
  124. Node n = new Node(nodeIndex);
  125. ns.string.append(n.toNodeString());
  126. if (n.high != 0) {
  127. ns.high.add(new Integer(pos));
  128. NodeString highNs = new NodeString(pos);
  129. highNs.low.add(new Integer(pos));
  130. int index = strings.indexOf(ns);
  131. strings.add(index, highNs);
  132. addNode(n.high, strings, highNs);
  133. }
  134. if (n.low != 0) {
  135. ns.low.add(new Integer(pos));
  136. NodeString lowNs = new NodeString(pos);
  137. lowNs.high.add(new Integer(pos));
  138. int index = strings.indexOf(ns);
  139. strings.add(index + 1, lowNs);
  140. addNode(n.low, strings, lowNs);
  141. }
  142. if (!n.isLeafNode) {
  143. addNode(n.equal, strings, ns);
  144. }
  145. }
  146. /**
  147. * Construct the tree representation of a list of node strings
  148. * @param strings the list of node strings
  149. * @return the string representing the tree
  150. */
  151. public String toTree(List strings) {
  152. StringBuffer indentString = new StringBuffer();
  153. for (int j = indentString.length(); j < ((NodeString) strings.get(0)).indent; ++j) {
  154. indentString.append(' ');
  155. }
  156. StringBuffer tree = new StringBuffer();
  157. for (int i = 0; i < strings.size(); ++i) {
  158. NodeString ns = (NodeString) strings.get(i);
  159. if (indentString.length() > ns.indent) {
  160. indentString.setLength(ns.indent);
  161. } else {
  162. // should not happen
  163. for (int j = indentString.length(); j < ns.indent; ++j) {
  164. indentString.append(' ');
  165. }
  166. }
  167. tree.append(indentString);
  168. tree.append(ns.string + "\n");
  169. if (i + 1 == strings.size()) {
  170. continue;
  171. }
  172. for (int j = 0; j < ns.low.size(); ++j) {
  173. int pos = ((Integer) ns.low.get(j)).intValue();
  174. if (pos < indentString.length()) {
  175. indentString.setCharAt(pos, '|');
  176. } else {
  177. for (int k = indentString.length(); k < pos; ++k) {
  178. indentString.append(' ');
  179. }
  180. indentString.append('|');
  181. }
  182. }
  183. tree.append(indentString + "\n");
  184. }
  185. return tree.toString();
  186. }
  187. /**
  188. * Construct the tree representation of the HyphenationTree object
  189. * @return the string representing the tree
  190. */
  191. public String toTree() {
  192. List strings = new ArrayList();
  193. NodeString ns = new NodeString(0);
  194. strings.add(ns);
  195. addNode(1, strings, ns);
  196. return toTree(strings);
  197. }
  198. /**
  199. * Construct the compact node representation of the HyphenationTree object
  200. * @return the string representing the tree
  201. */
  202. public String toCompactNodes() {
  203. StringBuffer s = new StringBuffer();
  204. for (int i = 1; i < ht.sc.length; ++i) {
  205. if (i != 1) {
  206. s.append("\n");
  207. }
  208. s.append((new Node(i)).toCompactString());
  209. }
  210. return s.toString();
  211. }
  212. /**
  213. * Construct the node representation of the HyphenationTree object
  214. * @return the string representing the tree
  215. */
  216. public String toNodes() {
  217. StringBuffer s = new StringBuffer();
  218. for (int i = 1; i < ht.sc.length; ++i) {
  219. if (i != 1) {
  220. s.append("\n");
  221. }
  222. s.append((new Node(i)).toString());
  223. }
  224. return s.toString();
  225. }
  226. /**
  227. * Construct the printed representation of the HyphenationTree object
  228. * @return the string representing the tree
  229. */
  230. public String toString() {
  231. StringBuffer s = new StringBuffer();
  232. s.append("classes: \n");
  233. s.append((new TernaryTreeAnalysis(ht.classmap)).toString());
  234. s.append("\npatterns: \n");
  235. s.append(super.toString());
  236. s.append("vspace: ");
  237. for (int i = 0; i < ht.vspace.length(); ++i) {
  238. byte v = ht.vspace.get(i);
  239. if (v == 0) {
  240. s.append("--");
  241. } else {
  242. int c = (int) ((v >>> 4) - 1);
  243. s.append(c);
  244. c = (int) (v & 0x0f);
  245. if (c == 0) {
  246. s.append("-");
  247. } else {
  248. c = (c - 1);
  249. s.append(c);
  250. }
  251. }
  252. }
  253. s.append("\n");
  254. return s.toString();
  255. }
  256. /**
  257. * Provide interactive access to a HyphenationTree object and its representation methods
  258. * @param args the arguments
  259. */
  260. public static void main(String[] args) {
  261. HyphenationTree ht = null;
  262. HyphenationTreeAnalysis hta = null;
  263. int minCharCount = 2;
  264. BufferedReader in = new BufferedReader(new java.io.InputStreamReader(System.in));
  265. while (true) {
  266. System.out.print("l:\tload patterns from XML\n"
  267. + "L:\tload patterns from serialized object\n"
  268. + "s:\tset minimun character count\n"
  269. + "w:\twrite hyphenation tree to object file\n"
  270. + "p:\tprint hyphenation tree to stdout\n"
  271. + "n:\tprint hyphenation tree nodes to stdout\n"
  272. + "c:\tprint compact hyphenation tree nodes to stdout\n"
  273. + "t:\tprint tree representation of hyphenation tree to stdout\n"
  274. + "h:\thyphenate\n"
  275. + "f:\tfind pattern\n"
  276. + "b:\tbenchmark\n"
  277. + "q:\tquit\n\n"
  278. + "Command:");
  279. try {
  280. String token = in.readLine().trim();
  281. if (token.equals("f")) {
  282. System.out.print("Pattern: ");
  283. token = in.readLine().trim();
  284. System.out.println("Values: " + ht.findPattern(token));
  285. } else if (token.equals("s")) {
  286. System.out.print("Minimum value: ");
  287. token = in.readLine().trim();
  288. minCharCount = Integer.parseInt(token);
  289. } else if (token.equals("l")) {
  290. ht = new HyphenationTree();
  291. hta = new HyphenationTreeAnalysis(ht);
  292. System.out.print("XML file name: ");
  293. token = in.readLine().trim();
  294. try {
  295. ht.loadPatterns(token);
  296. } catch (HyphenationException e) {
  297. e.printStackTrace();
  298. }
  299. } else if (token.equals("L")) {
  300. ObjectInputStream ois = null;
  301. System.out.print("Object file name: ");
  302. token = in.readLine().trim();
  303. try {
  304. String[] parts = token.split(":");
  305. InputStream is = null;
  306. if (parts.length == 1) {
  307. is = new FileInputStream(token);
  308. } else if (parts.length == 2) {
  309. ZipFile jar = new ZipFile(parts[0]);
  310. ZipEntry entry = new ZipEntry(jar.getEntry(parts[1]));
  311. is = jar.getInputStream(entry);
  312. }
  313. ois = new ObjectInputStream(is);
  314. ht = (HyphenationTree) ois.readObject();
  315. hta = new HyphenationTreeAnalysis(ht);
  316. } catch (Exception e) {
  317. e.printStackTrace();
  318. } finally {
  319. if (ois != null) {
  320. try {
  321. ois.close();
  322. } catch (IOException e) {
  323. //ignore
  324. }
  325. }
  326. }
  327. } else if (token.equals("w")) {
  328. System.out.print("Object file name: ");
  329. token = in.readLine().trim();
  330. ObjectOutputStream oos = null;
  331. try {
  332. oos = new ObjectOutputStream(new FileOutputStream(token));
  333. oos.writeObject(ht);
  334. } catch (Exception e) {
  335. e.printStackTrace();
  336. } finally {
  337. if (oos != null) {
  338. try {
  339. oos.flush();
  340. } catch (IOException e) {
  341. //ignore
  342. }
  343. try {
  344. oos.close();
  345. } catch (IOException e) {
  346. //ignore
  347. }
  348. }
  349. }
  350. } else if (token.equals("p")) {
  351. System.out.print(hta);
  352. } else if (token.equals("n")) {
  353. System.out.print(hta.toNodes());
  354. } else if (token.equals("c")) {
  355. System.out.print(hta.toCompactNodes());
  356. } else if (token.equals("t")) {
  357. System.out.print(hta.toTree());
  358. } else if (token.equals("h")) {
  359. System.out.print("Word: ");
  360. token = in.readLine().trim();
  361. System.out.print("Hyphenation points: ");
  362. System.out.println(ht.hyphenate(token, minCharCount,
  363. minCharCount));
  364. } else if (token.equals("b")) {
  365. if (ht == null) {
  366. System.out.println("No patterns have been loaded.");
  367. break;
  368. }
  369. System.out.print("Word list filename: ");
  370. token = in.readLine().trim();
  371. long starttime = 0;
  372. int counter = 0;
  373. try {
  374. BufferedReader reader = new BufferedReader(new FileReader(token));
  375. String line;
  376. starttime = System.currentTimeMillis();
  377. while ((line = reader.readLine()) != null) {
  378. // System.out.print("\nline: ");
  379. Hyphenation hyp = ht.hyphenate(line, minCharCount,
  380. minCharCount);
  381. if (hyp != null) {
  382. String hword = hyp.toString();
  383. // System.out.println(line);
  384. // System.out.println(hword);
  385. } else {
  386. // System.out.println("No hyphenation");
  387. }
  388. counter++;
  389. }
  390. } catch (Exception ioe) {
  391. System.out.println("Exception " + ioe);
  392. ioe.printStackTrace();
  393. }
  394. long endtime = System.currentTimeMillis();
  395. long result = endtime - starttime;
  396. System.out.println(counter + " words in " + result
  397. + " Milliseconds hyphenated");
  398. } else if (token.equals("q")) {
  399. break;
  400. }
  401. } catch (IOException e) {
  402. e.printStackTrace();
  403. }
  404. }
  405. }
  406. }