You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Word2Forrest.java 5.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.examples.hwpf;
  16. import java.io.FileInputStream;
  17. import java.io.FileOutputStream;
  18. import java.io.IOException;
  19. import java.io.InputStream;
  20. import java.io.OutputStream;
  21. import java.io.OutputStreamWriter;
  22. import java.io.Writer;
  23. import java.nio.charset.StandardCharsets;
  24. import org.apache.poi.hwpf.HWPFDocument;
  25. import org.apache.poi.hwpf.model.StyleDescription;
  26. import org.apache.poi.hwpf.model.StyleSheet;
  27. import org.apache.poi.hwpf.usermodel.CharacterRun;
  28. import org.apache.poi.hwpf.usermodel.Paragraph;
  29. import org.apache.poi.hwpf.usermodel.Range;
  30. @SuppressWarnings({"java:S106","java:S4823"})
  31. public final class Word2Forrest
  32. {
  33. Writer _out;
  34. HWPFDocument _doc;
  35. @SuppressWarnings("unused")
  36. public Word2Forrest(HWPFDocument doc, OutputStream stream) throws IOException
  37. {
  38. _out = new OutputStreamWriter (stream, StandardCharsets.UTF_8);
  39. _doc = doc;
  40. init ();
  41. openDocument ();
  42. openBody ();
  43. Range r = doc.getRange ();
  44. StyleSheet styleSheet = doc.getStyleSheet ();
  45. int sectionLevel = 0;
  46. int lenParagraph = r.numParagraphs ();
  47. boolean inCode = false;
  48. for (int x = 0; x < lenParagraph; x++)
  49. {
  50. Paragraph p = r.getParagraph (x);
  51. String text = p.text ();
  52. if (text.trim ().length () == 0)
  53. {
  54. continue;
  55. }
  56. StyleDescription paragraphStyle = styleSheet.getStyleDescription (p.
  57. getStyleIndex ());
  58. String styleName = paragraphStyle.getName();
  59. if (styleName.startsWith ("Heading"))
  60. {
  61. if (inCode)
  62. {
  63. closeSource();
  64. inCode = false;
  65. }
  66. int headerLevel = Integer.parseInt (styleName.substring (8));
  67. if (headerLevel > sectionLevel)
  68. {
  69. openSection ();
  70. }
  71. else
  72. {
  73. for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++)
  74. {
  75. closeSection ();
  76. }
  77. openSection ();
  78. }
  79. sectionLevel = headerLevel;
  80. openTitle ();
  81. writePlainText (text);
  82. closeTitle ();
  83. }
  84. else
  85. {
  86. int cruns = p.numCharacterRuns ();
  87. CharacterRun run = p.getCharacterRun (0);
  88. String fontName = run.getFontName();
  89. if (fontName.startsWith ("Courier"))
  90. {
  91. if (!inCode)
  92. {
  93. openSource ();
  94. inCode = true;
  95. }
  96. writePlainText (p.text());
  97. }
  98. else
  99. {
  100. if (inCode)
  101. {
  102. inCode = false;
  103. closeSource();
  104. }
  105. openParagraph();
  106. writePlainText(p.text());
  107. closeParagraph();
  108. }
  109. }
  110. }
  111. for (int x = 0; x < sectionLevel; x++)
  112. {
  113. closeSection();
  114. }
  115. closeBody();
  116. closeDocument();
  117. _out.flush();
  118. }
  119. public void init ()
  120. throws IOException
  121. {
  122. _out.write ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n");
  123. _out.write ("<!DOCTYPE document PUBLIC \"-//APACHE//DTD Documentation V1.1//EN\" \"./dtd/document-v11.dtd\">\r\n");
  124. }
  125. public void openDocument ()
  126. throws IOException
  127. {
  128. _out.write ("<document>\r\n");
  129. }
  130. public void closeDocument ()
  131. throws IOException
  132. {
  133. _out.write ("</document>\r\n");
  134. }
  135. public void openBody ()
  136. throws IOException
  137. {
  138. _out.write ("<body>\r\n");
  139. }
  140. public void closeBody ()
  141. throws IOException
  142. {
  143. _out.write ("</body>\r\n");
  144. }
  145. public void openSection ()
  146. throws IOException
  147. {
  148. _out.write ("<section>");
  149. }
  150. public void closeSection ()
  151. throws IOException
  152. {
  153. _out.write ("</section>");
  154. }
  155. public void openTitle ()
  156. throws IOException
  157. {
  158. _out.write ("<title>");
  159. }
  160. public void closeTitle ()
  161. throws IOException
  162. {
  163. _out.write ("</title>");
  164. }
  165. public void writePlainText (String text)
  166. throws IOException
  167. {
  168. _out.write (text);
  169. }
  170. public void openParagraph ()
  171. throws IOException
  172. {
  173. _out.write ("<p>");
  174. }
  175. public void closeParagraph ()
  176. throws IOException
  177. {
  178. _out.write ("</p>");
  179. }
  180. public void openSource ()
  181. throws IOException
  182. {
  183. _out.write ("<source><![CDATA[");
  184. }
  185. public void closeSource ()
  186. throws IOException
  187. {
  188. _out.write ("]]></source>");
  189. }
  190. public static void main(String[] args) throws IOException {
  191. try (InputStream is = new FileInputStream(args[0]);
  192. OutputStream out = new FileOutputStream("test.xml")) {
  193. new Word2Forrest(new HWPFDocument(is), out);
  194. }
  195. }
  196. }