You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

TestXWPFDocument.java 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.xwpf.usermodel;
  16. import static org.junit.Assert.assertEquals;
  17. import static org.junit.Assert.assertFalse;
  18. import static org.junit.Assert.assertNotNull;
  19. import static org.junit.Assert.assertSame;
  20. import static org.junit.Assert.assertTrue;
  21. import static org.junit.Assert.fail;
  22. import java.io.IOException;
  23. import java.io.OutputStream;
  24. import java.util.Arrays;
  25. import java.util.List;
  26. import org.apache.poi.POIDataSamples;
  27. import org.apache.poi.ooxml.POIXMLDocumentPart;
  28. import org.apache.poi.ooxml.POIXMLProperties;
  29. import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  30. import org.apache.poi.openxml4j.opc.OPCPackage;
  31. import org.apache.poi.openxml4j.opc.PackageAccess;
  32. import org.apache.poi.openxml4j.opc.PackagePart;
  33. import org.apache.poi.openxml4j.opc.PackagePartName;
  34. import org.apache.poi.openxml4j.opc.PackagingURIHelper;
  35. import org.apache.poi.xwpf.XWPFTestDataSamples;
  36. import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
  37. import org.apache.xmlbeans.XmlCursor;
  38. import org.junit.Ignore;
  39. import org.junit.Test;
  40. import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
  41. public final class TestXWPFDocument {
  42. @Test
  43. public void testContainsMainContentType() throws Exception {
  44. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("sample.docx");
  45. OPCPackage pack = doc.getPackage();
  46. boolean found = false;
  47. for (PackagePart part : pack.getParts()) {
  48. if (part.getContentType().equals(XWPFRelation.DOCUMENT.getContentType())) {
  49. found = true;
  50. }
  51. // if (false) {
  52. // // successful tests should be silent
  53. // System.out.println(part);
  54. // }
  55. }
  56. assertTrue(found);
  57. pack.close();
  58. doc.close();
  59. }
  60. @Test
  61. public void testOpen() throws Exception {
  62. // Simple file
  63. try (XWPFDocument xml1 = XWPFTestDataSamples.openSampleDocument("sample.docx")) {
  64. // Check it has key parts
  65. assertNotNull(xml1.getDocument());
  66. assertNotNull(xml1.getDocument().getBody());
  67. assertNotNull(xml1.getStyle());
  68. }
  69. // Complex file
  70. try (XWPFDocument xml2 = XWPFTestDataSamples.openSampleDocument("IllustrativeCases.docx")) {
  71. assertNotNull(xml2.getDocument());
  72. assertNotNull(xml2.getDocument().getBody());
  73. assertNotNull(xml2.getStyle());
  74. }
  75. }
  76. @Test
  77. public void testMetadataBasics() throws IOException {
  78. try (XWPFDocument xml = XWPFTestDataSamples.openSampleDocument("sample.docx")) {
  79. assertNotNull(xml.getProperties().getCoreProperties());
  80. assertNotNull(xml.getProperties().getExtendedProperties());
  81. assertEquals("Microsoft Office Word", xml.getProperties().getExtendedProperties().getUnderlyingProperties().getApplication());
  82. assertEquals(1315, xml.getProperties().getExtendedProperties().getUnderlyingProperties().getCharacters());
  83. assertEquals(10, xml.getProperties().getExtendedProperties().getUnderlyingProperties().getLines());
  84. assertEquals(null, xml.getProperties().getCoreProperties().getTitle());
  85. assertFalse(xml.getProperties().getCoreProperties().getUnderlyingProperties().getSubjectProperty().isPresent());
  86. }
  87. }
  88. @Test
  89. public void testMetadataComplex() throws IOException {
  90. XWPFDocument xml = XWPFTestDataSamples.openSampleDocument("IllustrativeCases.docx");
  91. assertNotNull(xml.getProperties().getCoreProperties());
  92. assertNotNull(xml.getProperties().getExtendedProperties());
  93. assertEquals("Microsoft Office Outlook", xml.getProperties().getExtendedProperties().getUnderlyingProperties().getApplication());
  94. assertEquals(5184, xml.getProperties().getExtendedProperties().getUnderlyingProperties().getCharacters());
  95. assertEquals(0, xml.getProperties().getExtendedProperties().getUnderlyingProperties().getLines());
  96. assertEquals(" ", xml.getProperties().getCoreProperties().getTitle());
  97. assertEquals(" ", xml.getProperties().getCoreProperties().getUnderlyingProperties().getSubjectProperty().get());
  98. xml.close();
  99. }
  100. @Test
  101. public void testWorkbookProperties() throws Exception {
  102. XWPFDocument doc = new XWPFDocument();
  103. POIXMLProperties props = doc.getProperties();
  104. assertNotNull(props);
  105. assertEquals("Apache POI", props.getExtendedProperties().getUnderlyingProperties().getApplication());
  106. doc.close();
  107. }
  108. @Test
  109. public void testAddParagraph() throws IOException {
  110. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("sample.docx");
  111. assertEquals(3, doc.getParagraphs().size());
  112. XWPFParagraph p = doc.createParagraph();
  113. assertEquals(p, doc.getParagraphs().get(3));
  114. assertEquals(4, doc.getParagraphs().size());
  115. assertEquals(3, doc.getParagraphPos(3));
  116. assertEquals(3, doc.getPosOfParagraph(p));
  117. CTP ctp = p.getCTP();
  118. XWPFParagraph newP = doc.getParagraph(ctp);
  119. assertSame(p, newP);
  120. XmlCursor cursor = doc.getDocument().getBody().getPArray(0).newCursor();
  121. XWPFParagraph cP = doc.insertNewParagraph(cursor);
  122. assertSame(cP, doc.getParagraphs().get(0));
  123. assertEquals(5, doc.getParagraphs().size());
  124. doc.close();
  125. }
  126. @Test
  127. public void testAddPicture() throws IOException, InvalidFormatException {
  128. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("sample.docx");
  129. byte[] jpeg = XWPFTestDataSamples.getImage("nature1.jpg");
  130. String relationId = doc.addPictureData(jpeg, Document.PICTURE_TYPE_JPEG);
  131. byte[] newJpeg = ((XWPFPictureData) doc.getRelationById(relationId)).getData();
  132. assertEquals(newJpeg.length, jpeg.length);
  133. for (int i = 0; i < jpeg.length; i++) {
  134. assertEquals(newJpeg[i], jpeg[i]);
  135. }
  136. doc.close();
  137. }
  138. @Test
  139. public void testAllPictureFormats() throws IOException, InvalidFormatException {
  140. XWPFDocument doc = new XWPFDocument();
  141. doc.addPictureData(new byte[10], Document.PICTURE_TYPE_EMF);
  142. doc.addPictureData(new byte[11], Document.PICTURE_TYPE_WMF);
  143. doc.addPictureData(new byte[12], Document.PICTURE_TYPE_PICT);
  144. doc.addPictureData(new byte[13], Document.PICTURE_TYPE_JPEG);
  145. doc.addPictureData(new byte[14], Document.PICTURE_TYPE_PNG);
  146. doc.addPictureData(new byte[15], Document.PICTURE_TYPE_DIB);
  147. doc.addPictureData(new byte[16], Document.PICTURE_TYPE_GIF);
  148. doc.addPictureData(new byte[17], Document.PICTURE_TYPE_TIFF);
  149. doc.addPictureData(new byte[18], Document.PICTURE_TYPE_EPS);
  150. doc.addPictureData(new byte[19], Document.PICTURE_TYPE_BMP);
  151. doc.addPictureData(new byte[20], Document.PICTURE_TYPE_WPG);
  152. assertEquals(11, doc.getAllPictures().size());
  153. XWPFDocument doc2 = XWPFTestDataSamples.writeOutAndReadBack(doc);
  154. assertEquals(11, doc2.getAllPictures().size());
  155. doc2.close();
  156. doc.close();
  157. }
  158. @Test
  159. public void testAddHyperlink() throws IOException {
  160. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("SampleDoc.docx");
  161. XWPFParagraph p = doc.createParagraph();
  162. XWPFHyperlinkRun h = p.createHyperlinkRun("http://poi.apache.org/");
  163. h.setText("Apache POI");
  164. assertEquals("http://poi.apache.org/", h.getHyperlink(doc).getURL());
  165. assertEquals(p.getRuns().size(), 1);
  166. assertEquals(p.getRuns().get(0), h);
  167. }
  168. @Test
  169. public void testRemoveBodyElement() throws IOException {
  170. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("sample.docx");
  171. assertEquals(3, doc.getParagraphs().size());
  172. assertEquals(3, doc.getBodyElements().size());
  173. XWPFParagraph p1 = doc.getParagraphs().get(0);
  174. XWPFParagraph p2 = doc.getParagraphs().get(1);
  175. XWPFParagraph p3 = doc.getParagraphs().get(2);
  176. assertEquals(p1, doc.getBodyElements().get(0));
  177. assertEquals(p1, doc.getParagraphs().get(0));
  178. assertEquals(p2, doc.getBodyElements().get(1));
  179. assertEquals(p2, doc.getParagraphs().get(1));
  180. assertEquals(p3, doc.getBodyElements().get(2));
  181. assertEquals(p3, doc.getParagraphs().get(2));
  182. // Add another
  183. XWPFParagraph p4 = doc.createParagraph();
  184. assertEquals(4, doc.getParagraphs().size());
  185. assertEquals(4, doc.getBodyElements().size());
  186. assertEquals(p1, doc.getBodyElements().get(0));
  187. assertEquals(p1, doc.getParagraphs().get(0));
  188. assertEquals(p2, doc.getBodyElements().get(1));
  189. assertEquals(p2, doc.getParagraphs().get(1));
  190. assertEquals(p3, doc.getBodyElements().get(2));
  191. assertEquals(p3, doc.getParagraphs().get(2));
  192. assertEquals(p4, doc.getBodyElements().get(3));
  193. assertEquals(p4, doc.getParagraphs().get(3));
  194. // Remove the 2nd
  195. assertEquals(true, doc.removeBodyElement(1));
  196. assertEquals(3, doc.getParagraphs().size());
  197. assertEquals(3, doc.getBodyElements().size());
  198. assertEquals(p1, doc.getBodyElements().get(0));
  199. assertEquals(p1, doc.getParagraphs().get(0));
  200. assertEquals(p3, doc.getBodyElements().get(1));
  201. assertEquals(p3, doc.getParagraphs().get(1));
  202. assertEquals(p4, doc.getBodyElements().get(2));
  203. assertEquals(p4, doc.getParagraphs().get(2));
  204. // Remove the 1st
  205. assertEquals(true, doc.removeBodyElement(0));
  206. assertEquals(2, doc.getParagraphs().size());
  207. assertEquals(2, doc.getBodyElements().size());
  208. assertEquals(p3, doc.getBodyElements().get(0));
  209. assertEquals(p3, doc.getParagraphs().get(0));
  210. assertEquals(p4, doc.getBodyElements().get(1));
  211. assertEquals(p4, doc.getParagraphs().get(1));
  212. // Remove the last
  213. assertEquals(true, doc.removeBodyElement(1));
  214. assertEquals(1, doc.getParagraphs().size());
  215. assertEquals(1, doc.getBodyElements().size());
  216. assertEquals(p3, doc.getBodyElements().get(0));
  217. assertEquals(p3, doc.getParagraphs().get(0));
  218. doc.close();
  219. }
  220. @Test
  221. public void testRegisterPackagePictureData() throws IOException, InvalidFormatException {
  222. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("issue_51265_1.docx");
  223. /* manually assemble a new image package part*/
  224. OPCPackage opcPckg = doc.getPackage();
  225. XWPFRelation jpgRelation = XWPFRelation.IMAGE_JPEG;
  226. PackagePartName partName = PackagingURIHelper.createPartName(jpgRelation.getDefaultFileName().replace('#', '2'));
  227. PackagePart newImagePart = opcPckg.createPart(partName, jpgRelation.getContentType());
  228. byte[] nature1 = XWPFTestDataSamples.getImage("abstract4.jpg");
  229. OutputStream os = newImagePart.getOutputStream();
  230. os.write(nature1);
  231. os.close();
  232. XWPFHeader xwpfHeader = doc.getHeaderArray(0);
  233. XWPFPictureData newPicData = new XWPFPictureData(newImagePart);
  234. /* new part is now ready to rumble */
  235. assertFalse(xwpfHeader.getAllPictures().contains(newPicData));
  236. assertFalse(doc.getAllPictures().contains(newPicData));
  237. assertFalse(doc.getAllPackagePictures().contains(newPicData));
  238. doc.registerPackagePictureData(newPicData);
  239. assertFalse(xwpfHeader.getAllPictures().contains(newPicData));
  240. assertFalse(doc.getAllPictures().contains(newPicData));
  241. assertTrue(doc.getAllPackagePictures().contains(newPicData));
  242. doc.getPackage().revert();
  243. opcPckg.close();
  244. doc.close();
  245. }
  246. @Test
  247. public void testFindPackagePictureData() throws IOException {
  248. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("issue_51265_1.docx");
  249. byte[] nature1 = XWPFTestDataSamples.getImage("nature1.gif");
  250. XWPFPictureData part = doc.findPackagePictureData(nature1, Document.PICTURE_TYPE_GIF);
  251. assertNotNull(part);
  252. assertTrue(doc.getAllPictures().contains(part));
  253. assertTrue(doc.getAllPackagePictures().contains(part));
  254. doc.getPackage().revert();
  255. doc.close();
  256. }
  257. @Test
  258. public void testGetAllPictures() throws IOException {
  259. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("issue_51265_3.docx");
  260. List<XWPFPictureData> allPictures = doc.getAllPictures();
  261. List<XWPFPictureData> allPackagePictures = doc.getAllPackagePictures();
  262. assertNotNull(allPictures);
  263. assertEquals(3, allPictures.size());
  264. for (XWPFPictureData xwpfPictureData : allPictures) {
  265. assertTrue(allPackagePictures.contains(xwpfPictureData));
  266. }
  267. try {
  268. allPictures.add(allPictures.get(0));
  269. fail("This list must be unmodifiable!");
  270. } catch (UnsupportedOperationException e) {
  271. // all ok
  272. }
  273. doc.getPackage().revert();
  274. doc.close();
  275. }
  276. @Test
  277. public void testGetAllPackagePictures() throws IOException {
  278. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("issue_51265_3.docx");
  279. List<XWPFPictureData> allPackagePictures = doc.getAllPackagePictures();
  280. assertNotNull(allPackagePictures);
  281. assertEquals(5, allPackagePictures.size());
  282. try {
  283. allPackagePictures.add(allPackagePictures.get(0));
  284. fail("This list must be unmodifiable!");
  285. } catch (UnsupportedOperationException e) {
  286. // all ok
  287. }
  288. doc.getPackage().revert();
  289. doc.close();
  290. }
  291. @Test
  292. public void testPictureHandlingSimpleFile() throws IOException, InvalidFormatException {
  293. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("issue_51265_1.docx");
  294. assertEquals(1, doc.getAllPackagePictures().size());
  295. byte[] newPic = XWPFTestDataSamples.getImage("abstract4.jpg");
  296. String id1 = doc.addPictureData(newPic, Document.PICTURE_TYPE_JPEG);
  297. assertEquals(2, doc.getAllPackagePictures().size());
  298. /* copy data, to avoid instance-equality */
  299. byte[] newPicCopy = Arrays.copyOf(newPic, newPic.length);
  300. String id2 = doc.addPictureData(newPicCopy, Document.PICTURE_TYPE_JPEG);
  301. assertEquals(id1, id2);
  302. doc.getPackage().revert();
  303. doc.close();
  304. }
  305. @Test
  306. public void testPictureHandlingHeaderDocumentImages() throws IOException {
  307. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("issue_51265_2.docx");
  308. assertEquals(1, doc.getAllPictures().size());
  309. assertEquals(1, doc.getAllPackagePictures().size());
  310. assertEquals(1, doc.getHeaderArray(0).getAllPictures().size());
  311. doc.getPackage().revert();
  312. doc.close();
  313. }
  314. @Test
  315. public void testPictureHandlingComplex() throws IOException, InvalidFormatException {
  316. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("issue_51265_3.docx");
  317. XWPFHeader xwpfHeader = doc.getHeaderArray(0);
  318. assertEquals(3, doc.getAllPictures().size());
  319. assertEquals(3, xwpfHeader.getAllPictures().size());
  320. assertEquals(5, doc.getAllPackagePictures().size());
  321. byte[] nature1 = XWPFTestDataSamples.getImage("nature1.jpg");
  322. String id = doc.addPictureData(nature1, Document.PICTURE_TYPE_JPEG);
  323. POIXMLDocumentPart part1 = xwpfHeader.getRelationById("rId1");
  324. XWPFPictureData part2 = (XWPFPictureData) doc.getRelationById(id);
  325. assertSame(part1, part2);
  326. doc.getPackage().revert();
  327. doc.close();
  328. }
  329. @Test
  330. public void testZeroLengthLibreOfficeDocumentWithWaterMarkHeader() throws IOException {
  331. XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("zero-length.docx");
  332. POIXMLProperties properties = doc.getProperties();
  333. assertNotNull(properties.getCoreProperties());
  334. XWPFHeader headerArray = doc.getHeaderArray(0);
  335. assertEquals(1, headerArray.getAllPictures().size());
  336. assertEquals("image1.png", headerArray.pictures.get(0).getFileName());
  337. assertEquals("", headerArray.getText());
  338. POIXMLProperties.ExtendedProperties extendedProperties = properties.getExtendedProperties();
  339. assertNotNull(extendedProperties);
  340. assertEquals(0, extendedProperties.getUnderlyingProperties().getCharacters());
  341. doc.close();
  342. }
  343. @Test
  344. public void testSettings() throws IOException {
  345. XWPFSettings settings = new XWPFSettings();
  346. assertEquals(100, settings.getZoomPercent());
  347. settings.setZoomPercent(50);
  348. assertEquals(50, settings.getZoomPercent());
  349. assertEquals(false, settings.getEvenAndOddHeadings());
  350. settings.setEvenAndOddHeadings(true);
  351. assertEquals(true, settings.getEvenAndOddHeadings());
  352. assertEquals(false, settings.getMirrorMargins());
  353. settings.setMirrorMargins(true);
  354. assertEquals(true, settings.getMirrorMargins());
  355. XWPFDocument doc = new XWPFDocument();
  356. assertEquals(100, doc.getZoomPercent());
  357. doc.setZoomPercent(50);
  358. assertEquals(50, doc.getZoomPercent());
  359. doc.setZoomPercent(200);
  360. assertEquals(200, doc.getZoomPercent());
  361. assertEquals(false, doc.getEvenAndOddHeadings());
  362. doc.setEvenAndOddHeadings(true);
  363. assertEquals(true, doc.getEvenAndOddHeadings());
  364. assertEquals(false, doc.getMirrorMargins());
  365. doc.setMirrorMargins(true);
  366. assertEquals(true, doc.getMirrorMargins());
  367. XWPFDocument back = XWPFTestDataSamples.writeOutAndReadBack(doc);
  368. assertEquals(200, back.getZoomPercent());
  369. back.close();
  370. // OutputStream out = new FileOutputStream("/tmp/testZoom.docx");
  371. // doc.write(out);
  372. // out.close();
  373. doc.close();
  374. }
  375. @Test
  376. public void testEnforcedWith() throws IOException {
  377. XWPFDocument docx = XWPFTestDataSamples.openSampleDocument("EnforcedWith.docx");
  378. assertTrue(docx.isEnforcedProtection());
  379. docx.close();
  380. }
  381. @Test
  382. @Ignore("XWPF should be able to write to a new Stream when opened Read-Only")
  383. public void testWriteFromReadOnlyOPC() throws Exception {
  384. OPCPackage opc = OPCPackage.open(
  385. POIDataSamples.getDocumentInstance().getFile("SampleDoc.docx"),
  386. PackageAccess.READ
  387. );
  388. XWPFDocument doc = new XWPFDocument(opc);
  389. XWPFWordExtractor ext = new XWPFWordExtractor(doc);
  390. String origText = ext.getText();
  391. doc = XWPFTestDataSamples.writeOutAndReadBack(doc);
  392. ext.close();
  393. ext = new XWPFWordExtractor(doc);
  394. assertEquals(origText, ext.getText());
  395. ext.close();
  396. }
  397. }