==================================================================== */
package org.apache.poi.hwpf.usermodel;
+import static org.apache.poi.POIDataSamples.getDocumentInstance;
import static org.apache.poi.POITestCase.assertContains;
import static org.apache.poi.POITestCase.assertNotContained;
+import static org.apache.poi.hwpf.HWPFTestDataSamples.openSampleFile;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import java.util.List;
import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.XMLHelper;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.ValueSource;
import org.w3c.dom.Document;
/**
* Test different problems reported in the Apache Bugzilla
- * against HWPF
+ * against HWPF
*/
-public class TestBugs{
+class TestBugs {
private static final POILogger logger = POILogFactory.getLogger(TestBugs.class);
private static void assertEqualsIgnoreNewline(String expected, String actual) {
- String newExpected = expected.replaceAll("\r\n", "\n" )
- .replaceAll("\r", "\n").trim();
- String newActual = actual.replaceAll("\r\n", "\n" )
- .replaceAll("\r", "\n").trim();
+ String newExpected = expected.replaceAll("\r\n", "\n")
+ .replaceAll("\r", "\n").trim();
+ String newActual = actual.replaceAll("\r\n", "\n")
+ .replaceAll("\r", "\n").trim();
assertEquals(newExpected, newActual);
}
- private static void assertTableStructures(Range expected, Range actual ) {
+ private static void assertTableStructures(Range expected, Range actual) {
assertEquals(expected.numParagraphs(), actual.numParagraphs());
- for (int p = 0; p < expected.numParagraphs(); p++ )
- {
+ for (int p = 0; p < expected.numParagraphs(); p++) {
Paragraph expParagraph = expected.getParagraph(p);
Paragraph actParagraph = actual.getParagraph(p);
assertEqualsIgnoreNewline(expParagraph.text(), actParagraph.text());
- assertEquals( expParagraph.isInTable(), actParagraph.isInTable(), "Different isInTable flags for paragraphs #" + p
- + " -- " + expParagraph + " -- " + actParagraph + "." );
+ assertEquals(expParagraph.isInTable(), actParagraph.isInTable(), "Different isInTable flags for paragraphs #" + p
+ + " -- " + expParagraph + " -- " + actParagraph + ".");
assertEquals(expParagraph.isTableRowEnd(),
- actParagraph.isTableRowEnd());
+ actParagraph.isTableRowEnd());
- if (expParagraph.isInTable() && actParagraph.isInTable() )
- {
+ if (expParagraph.isInTable() && actParagraph.isInTable()) {
Table expTable, actTable;
- try
- {
+ try {
expTable = expected.getTable(expParagraph);
actTable = actual.getTable(actParagraph);
- }
- catch (Exception exc )
- {
+ } catch (Exception exc) {
continue;
}
assertEquals(expTable.numRows(), actTable.numRows());
assertEquals(expTable.numParagraphs(),
- actTable.numParagraphs());
+ actTable.numParagraphs());
}
}
}
private String getText(String sampleFile) throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile(sampleFile);
+ HWPFDocument doc = openSampleFile(sampleFile);
WordExtractor extractor = new WordExtractor(doc);
try {
return extractor.getText();
}
}
- /**
- * Bug 33519 - HWPF fails to read a file
- */
- @Test
- void test33519() throws IOException {
- assertNotNull(getText("Bug33519.doc"));
- }
-
/**
* Bug 34898 - WordExtractor doesn't read the whole string from the file
*/
*/
@Test
void test41898() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug41898.doc");
+ HWPFDocument doc = openSampleFile("Bug41898.doc");
List<Picture> pics = doc.getPicturesTable().getAllPictures();
assertNotNull(pics);
* --sergey
*/
final Collection<OfficeDrawing> officeDrawings = doc
- .getOfficeDrawingsMain().getOfficeDrawings();
+ .getOfficeDrawingsMain().getOfficeDrawings();
assertNotNull(officeDrawings);
assertEquals(1, officeDrawings.size());
@SuppressWarnings("deprecation")
@Test
void test44431() throws IOException {
- HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug44431.doc");
+ HWPFDocument doc1 = openSampleFile("Bug44431.doc");
WordExtractor extractor1 = new WordExtractor(doc1);
try {
assertEqualsIgnoreNewline(extractor1.getFooterText(), extractor2.getFooterText());
assertEqualsIgnoreNewline(extractor1.getHeaderText(), extractor2.getHeaderText());
assertEqualsIgnoreNewline(Arrays.toString(extractor1.getParagraphText()),
- Arrays.toString(extractor2.getParagraphText()));
+ Arrays.toString(extractor2.getParagraphText()));
assertEqualsIgnoreNewline(extractor1.getText(), extractor2.getText());
}
@Test
void test44431_2() throws IOException {
assertEqualsIgnoreNewline("File name=FieldsTest.doc\n" +
- "\n" +
- "\n" +
- "STYLEREF test\n" +
- "\n" +
- "\n" +
- "\n" +
- "TEST TABLE OF CONTENTS\n" +
- "\n" +
- "Heading paragraph in next page\t2\n" +
- "Another heading paragraph in further page\t3\n" +
- "Another heading paragraph in further page\t3\n" +
- "\n" +
- "\n" +
- "Heading paragraph in next page\n" +
- "Another heading paragraph in further page\n" +
- "\n" +
- "\n" +
- "\n" +
- "Page 3 of 3", getText("Bug44431.doc"));
+ "\n" +
+ "\n" +
+ "STYLEREF test\n" +
+ "\n" +
+ "\n" +
+ "\n" +
+ "TEST TABLE OF CONTENTS\n" +
+ "\n" +
+ "Heading paragraph in next page\t2\n" +
+ "Another heading paragraph in further page\t3\n" +
+ "Another heading paragraph in further page\t3\n" +
+ "\n" +
+ "\n" +
+ "Heading paragraph in next page\n" +
+ "Another heading paragraph in further page\n" +
+ "\n" +
+ "\n" +
+ "\n" +
+ "Page 3 of 3", getText("Bug44431.doc"));
}
/**
@Test
void test45473() throws IOException {
// Fetch the current text
- HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug45473.doc");
+ HWPFDocument doc1 = openSampleFile("Bug45473.doc");
WordExtractor wordExtractor = new WordExtractor(doc1);
final String text1;
try {
}
// Re-load, then re-save and re-check
- doc1 = HWPFTestDataSamples.openSampleFile("Bug45473.doc");
+ doc1 = openSampleFile("Bug45473.doc");
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
WordExtractor wordExtractor2 = new WordExtractor(doc2);
final String text2;
// the text in the saved document has some differences in line
// separators but we tolerate that
- assertEqualsIgnoreNewline(text1.replaceAll("\n", "" ), text2.replaceAll("\n", ""));
+ assertEqualsIgnoreNewline(text1.replaceAll("\n", ""), text2.replaceAll("\n", ""));
}
/**
*/
@Test
void test46220() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug46220.doc");
+ HWPFDocument doc = openSampleFile("Bug46220.doc");
// reference checksums as in Bugzilla
- String[] md5 = { "851be142bce6d01848e730cb6903f39e",
- "7fc6d8fb58b09ababd036d10a0e8c039",
- "a7dc644c40bc2fbf17b2b62d07f99248",
- "72d07b8db5fad7099d90bc4c304b4666" };
+ String[] md5 = {"851be142bce6d01848e730cb6903f39e",
+ "7fc6d8fb58b09ababd036d10a0e8c039",
+ "a7dc644c40bc2fbf17b2b62d07f99248",
+ "72d07b8db5fad7099d90bc4c304b4666"};
List<Picture> pics = doc.getPicturesTable().getAllPictures();
assertEquals(4, pics.size());
- for (int i = 0; i < pics.size(); i++ )
- {
+ for (int i = 0; i < pics.size(); i++) {
Picture pic = pics.get(i);
byte[] data = pic.getRawContent();
// use Apache Commons Codec utils to compute md5
@Test
void test47286() throws IOException {
// Fetch the current text
- HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug47286.doc");
+ HWPFDocument doc1 = openSampleFile("Bug47286.doc");
WordExtractor wordExtractor = new WordExtractor(doc1);
final String text1;
try {
}
// Re-load, then re-save and re-check
- doc1 = HWPFTestDataSamples.openSampleFile("Bug47286.doc");
+ doc1 = openSampleFile("Bug47286.doc");
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
WordExtractor wordExtractor2 = new WordExtractor(doc2);
final String text2;
// the text in the saved document has some differences in line
// separators but we tolerate that
- assertEqualsIgnoreNewline(text1.replaceAll("\n", "" ), text2.replaceAll("\n", ""));
+ assertEqualsIgnoreNewline(text1.replaceAll("\n", ""), text2.replaceAll("\n", ""));
assertEquals(doc1.getCharacterTable().getTextRuns().size(), doc2
- .getCharacterTable().getTextRuns().size());
+ .getCharacterTable().getTextRuns().size());
assertTableStructures(doc1.getRange(), doc2.getRange());
}
*/
@Test
void test47287() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47287.doc");
- String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
- "1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
+ HWPFDocument doc = openSampleFile("Bug47287.doc");
+ String[] values = {"1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
+ "1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15",};
int usedVal = 0;
String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
Range r = doc.getRange();
- for (int x = 0; x < r.numSections(); x++ )
- {
+ for (int x = 0; x < r.numSections(); x++) {
Section s = r.getSection(x);
- for (int y = 0; y < s.numParagraphs(); y++ )
- {
+ for (int y = 0; y < s.numParagraphs(); y++) {
Paragraph p = s.getParagraph(y);
- for (int z = 0; z < p.numCharacterRuns(); z++ )
- {
+ for (int z = 0; z < p.numCharacterRuns(); z++) {
boolean isFound = false;
// character run
String text = run.text();
String oldText = text;
int c = text.indexOf("FORMTEXT ");
- if (c < 0 )
- {
+ if (c < 0) {
int k = text.indexOf(PLACEHOLDER);
- if (k >= 0 )
- {
- text = text.substring(0, k ) + values[usedVal]
- + text.substring(k + PLACEHOLDER.length());
+ if (k >= 0) {
+ text = text.substring(0, k) + values[usedVal]
+ + text.substring(k + PLACEHOLDER.length());
usedVal++;
isFound = true;
}
- }
- else
- {
+ } else {
for (; c >= 0; c = text.indexOf("FORMTEXT ", c
- + "FORMTEXT ".length() ) )
- {
+ + "FORMTEXT ".length())) {
int k = text.indexOf(PLACEHOLDER, c);
- if (k >= 0 )
- {
- text = text.substring(0, k )
- + values[usedVal]
- + text.substring(k
- + PLACEHOLDER.length());
+ if (k >= 0) {
+ text = text.substring(0, k)
+ + values[usedVal]
+ + text.substring(k
+ + PLACEHOLDER.length());
usedVal++;
isFound = true;
}
}
}
- if (isFound )
- {
+ if (isFound) {
run.replaceText(oldText, text, 0);
}
// (2) read text from text document (retrieved by saving the word
// document as text file using encoding UTF-8)
- try (InputStream is = POIDataSamples.getDocumentInstance()
- .openResourceAsStream("Bug47742-text.txt")) {
+ try (InputStream is = getDocumentInstance()
+ .openResourceAsStream("Bug47742-text.txt")) {
byte[] expectedBytes = IOUtils.toByteArray(is);
String expectedText = new String(expectedBytes, StandardCharsets.UTF_8)
- .substring(1); // strip-off the unicode marker
+ .substring(1); // strip-off the unicode marker
assertEqualsIgnoreNewline(expectedText, foundText);
}
*/
@Test
void test47958() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47958.doc");
+ HWPFDocument doc = openSampleFile("Bug47958.doc");
doc.getPicturesTable().getAllPictures();
}
* formatting)
*/
@Test
- void test48065() {
- HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug48065.doc");
- HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
+ void test48065() throws IOException {
+ try (HWPFDocument doc1 = openSampleFile("Bug48065.doc");
+ HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1)) {
- Range expected = doc1.getRange();
- Range actual = doc2.getRange();
+ Range expected = doc1.getRange();
+ Range actual = doc2.getRange();
- assertEqualsIgnoreNewline(
- expected.text().replace("\r", "\n").replaceAll("\n\n", "\n" ),
+ assertEqualsIgnoreNewline(
+ expected.text().replace("\r", "\n").replaceAll("\n\n", "\n"),
actual.text().replace("\r", "\n").replaceAll("\n\n", "\n"));
- assertTableStructures(expected, actual);
+ assertTableStructures(expected, actual);
+ }
}
@Test
void test49933() throws IOException {
try (HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug49933.doc");
- Word6Extractor extractor = new Word6Extractor(doc)) {
+ Word6Extractor extractor = new Word6Extractor(doc)) {
assertContains(extractor.getText(), "best.wine.jump.ru");
}
}
/**
* Bug 50936 - Exception parsing MS Word 8.0 file
*/
- @Test
- void test50936() throws Exception {
- String[] filenames = {"Bug50936_1.doc", "Bug50936_2.doc", "Bug50936_3.doc"};
- for (String filename : filenames) {
- HWPFDocument hwpfDocument = HWPFTestDataSamples.openSampleFile(filename);
-
- assertNotNull( hwpfDocument.getPicturesTable().getAllPictures(), filename );
-
- hwpfDocument.close();
+ @ParameterizedTest
+ @ValueSource(strings = {"Bug50936_1.doc", "Bug50936_2.doc", "Bug50936_3.doc"})
+ void test50936(String filename) throws Exception {
+ try (HWPFDocument hwpfDocument = openSampleFile(filename)) {
+ assertNotNull(hwpfDocument.getPicturesTable().getAllPictures());
}
}
* release from download site )
*/
@Test
- void test51604() {
- HWPFDocument document = HWPFTestDataSamples
- .openSampleFile("Bug51604.doc");
+ void test51604() throws IOException {
+ try (HWPFDocument document = openSampleFile("Bug51604.doc")) {
- Range range = document.getRange();
- int numParagraph = range.numParagraphs();
- int counter = 0;
- for (int i = 0; i < numParagraph; i++ )
- {
- Paragraph paragraph = range.getParagraph(i);
- int numCharRuns = paragraph.numCharacterRuns();
- for (int j = 0; j < numCharRuns; j++ )
- {
- CharacterRun charRun = paragraph.getCharacterRun(j);
- String text = charRun.text();
- charRun.replaceText(text, "+" + (++counter));
+ Range range = document.getRange();
+ int numParagraph = range.numParagraphs();
+ int counter = 0;
+ for (int i = 0; i < numParagraph; i++) {
+ Paragraph paragraph = range.getParagraph(i);
+ int numCharRuns = paragraph.numCharacterRuns();
+ for (int j = 0; j < numCharRuns; j++) {
+ CharacterRun charRun = paragraph.getCharacterRun(j);
+ String text = charRun.text();
+ charRun.replaceText(text, "+" + (++counter));
+ }
}
- }
- document = HWPFTestDataSamples.writeOutAndReadBack(document);
- String text = document.getDocumentText();
- assertEqualsIgnoreNewline("+1+2+3+4+5+6+7+8+9+10+11+12", text);
+ try (HWPFDocument document2 = HWPFTestDataSamples.writeOutAndReadBack(document)) {
+ String text = document2.getDocumentText();
+ assertEqualsIgnoreNewline("+1+2+3+4+5+6+7+8+9+10+11+12", text);
+ }
+ }
}
/**
*/
@Test
void test51604p2() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug51604.doc");
+ HWPFDocument doc = openSampleFile("Bug51604.doc");
Range range = doc.getRange();
int numParagraph = range.numParagraphs();
replaceText(range, numParagraph);
doc = HWPFTestDataSamples.writeOutAndReadBack(doc);
- final FileInformationBlock fileInformationBlock = doc
- .getFileInformationBlock();
+ final FileInformationBlock fileInformationBlock = doc.getFileInformationBlock();
int totalLength = 0;
- for (SubdocumentType type : SubdocumentType.values() )
- {
- final int partLength = fileInformationBlock
- .getSubdocumentTextStreamLength(type);
+ for (SubdocumentType type : SubdocumentType.values()) {
+ final int partLength = fileInformationBlock.getSubdocumentTextStreamLength(type);
assert (partLength >= 0);
totalLength += partLength;
}
private void replaceText(Range range, int numParagraph) {
- for (int i = 0; i < numParagraph; i++ )
- {
+ for (int i = 0; i < numParagraph; i++) {
Paragraph paragraph = range.getParagraph(i);
int numCharRuns = paragraph.numCharacterRuns();
- for (int j = 0; j < numCharRuns; j++ )
- {
+ for (int j = 0; j < numCharRuns; j++) {
CharacterRun charRun = paragraph.getCharacterRun(j);
String text = charRun.text();
- if (text.contains("Header" ) ) {
+ if (text.contains("Header")) {
charRun.replaceText(text, "added");
}
}
*/
@Test
void test51604p3() throws Exception {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug51604.doc");
+ try (HWPFDocument doc = openSampleFile("Bug51604.doc")) {
- FileInformationBlock fib = doc.getFileInformationBlock();
- byte[] originalData = Arrays.copyOfRange(doc.getTableStream(), fib.getFcDop(), fib.getFcDop()+fib.getLcbDop());
+ FileInformationBlock fib = doc.getFileInformationBlock();
+ byte[] originalData = Arrays.copyOfRange(doc.getTableStream(), fib.getFcDop(), fib.getFcDop() + fib.getLcbDop());
- ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
- doc.getDocProperties().writeTo(outputStream);
- final byte[] oldData = outputStream.toByteArray();
+ ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+ doc.getDocProperties().writeTo(outputStream);
+ final byte[] oldData = outputStream.toByteArray();
- assertEqualsIgnoreNewline(Arrays.toString(originalData ),
- Arrays.toString(oldData));
+ assertEqualsIgnoreNewline(Arrays.toString(originalData), Arrays.toString(oldData));
- Range range = doc.getRange();
- int numParagraph = range.numParagraphs();
- replaceText(range, numParagraph);
-
- doc = HWPFTestDataSamples.writeOutAndReadBack(doc);
+ Range range = doc.getRange();
+ int numParagraph = range.numParagraphs();
+ replaceText(range, numParagraph);
- outputStream = new ByteArrayOutputStream();
- doc.getDocProperties().writeTo(outputStream);
- final byte[] newData = outputStream.toByteArray();
+ try (HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc)) {
+ outputStream = new ByteArrayOutputStream();
+ doc2.getDocProperties().writeTo(outputStream);
+ final byte[] newData = outputStream.toByteArray();
- assertEqualsIgnoreNewline(Arrays.toString(oldData ), Arrays.toString(newData));
+ assertEqualsIgnoreNewline(Arrays.toString(oldData), Arrays.toString(newData));
+ }
+ }
}
/**
*/
@Test
void test51671() throws Exception {
- InputStream is = POIDataSamples.getDocumentInstance()
- .openResourceAsStream("empty.doc");
+ InputStream is = getDocumentInstance()
+ .openResourceAsStream("empty.doc");
try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem(is)) {
HWPFDocument hwpfDocument = new HWPFDocument(
- poifsFileSystem.getRoot());
+ poifsFileSystem.getRoot());
hwpfDocument.write(new ByteArrayOutputStream());
hwpfDocument.close();
}
// YK: the test will run only if the poi.test.remote system property is
// set.
// TODO: refactor into something nicer!
- if (System.getProperty("poi.test.remote" ) != null )
- {
+ if (System.getProperty("poi.test.remote") != null) {
String href = "http://domex.nps.edu/corp/files/govdocs1/007/007488.doc";
HWPFDocument hwpfDocument = HWPFTestDataSamples
- .openRemoteFile(href);
+ .openRemoteFile(href);
try (WordExtractor wordExtractor = new WordExtractor(hwpfDocument)) {
wordExtractor.getText();
* ArrayIndexOutOfBoundsException
*/
@Test
- void testBug51890() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug51890.doc");
- for (Picture picture : doc.getPicturesTable().getAllPictures() )
- {
- PictureType pictureType = picture.suggestPictureType();
- logger.log(POILogger.DEBUG,
- "Picture at offset ", picture.getStartOffset(),
- " has type ", pictureType);
+ void testBug51890() throws IOException {
+ Object[][] exp = {
+ {0, PictureType.UNKNOWN},
+ {500, PictureType.PNG},
+ {2020, PictureType.UNKNOWN},
+ {2354, PictureType.UNKNOWN},
+ {3281, PictureType.UNKNOWN},
+ {4235, PictureType.UNKNOWN},
+ {5126, PictureType.BMP},
+ {6923, PictureType.BMP},
+ };
+
+ try (HWPFDocument doc = openSampleFile("Bug51890.doc")) {
+ int idx = 0;
+ for (Picture picture : doc.getPicturesTable().getAllPictures()) {
+ assertEquals(exp[idx][0], picture.getStartOffset());
+ assertEquals(exp[idx][1], picture.suggestPictureType());
+ idx++;
+ }
}
}
- /**
- * [RESOLVED FIXED] Bug 51834 - Opening and Writing .doc file results in
- * corrupt document
- */
- @Test
- void testBug51834() {
- /*
- * we don't have Java test for this file - it should be checked using
- * Microsoft BFF Validator. But check read-write-read anyway. -- sergey
- */
- HWPFTestDataSamples.openSampleFile("Bug51834.doc");
- HWPFTestDataSamples.writeOutAndReadBack(HWPFTestDataSamples
- .openSampleFile("Bug51834.doc"));
- }
-
/**
* Bug 51944 - PAPFormattedDiskPage.getPAPX - IndexOutOfBounds
*/
assertNotNull(WordToTextConverter.getText(doc));
}
- /**
- * Bug 52032 - [BUG] & [partial-PATCH] HWPF - ArrayIndexOutOfBoundsException
- * with no stack trace (broken after revision 1178063)
- */
- @Test
- void testBug52032_1() throws Exception {
- assertNotNull(getText("Bug52032_1.doc"));
- }
-
- /**
- * Bug 52032 - [BUG] & [partial-PATCH] HWPF - ArrayIndexOutOfBoundsException
- * with no stack trace (broken after revision 1178063)
- */
- @Test
- void testBug52032_2() throws Exception {
- assertNotNull(getText("Bug52032_2.doc"));
- }
-
- /**
- * Bug 52032 - [BUG] & [partial-PATCH] HWPF - ArrayIndexOutOfBoundsException
- * with no stack trace (broken after revision 1178063)
- */
- @Test
- void testBug52032_3() throws Exception {
- assertNotNull(getText("Bug52032_3.doc"));
- }
-
- /**
- * Bug 53380 - ArrayIndexOutOfBounds Exception parsing word 97 document
- */
- @Test
- void testBug53380_1() throws Exception {
- assertNotNull(getText("Bug53380_1.doc"));
- }
-
- /**
- * Bug 53380 - ArrayIndexOutOfBounds Exception parsing word 97 document
- */
- @Test
- void testBug53380_2() throws Exception
- {
- assertNotNull(getText("Bug53380_2.doc"));
+ @ParameterizedTest
+ @CsvSource({
+ // Bug 51834 - Opening and Writing .doc file results in corrupt document
+ // we don't have Java test for this file - it should be checked using
+ // Microsoft BFF Validator. But check read-write-read anyway. -- sergey
+ "Bug51834.doc, true",
+ // Bug 52032 - ArrayIndexOutOfBoundsException with no stack trace (broken after revision 1178063)
+ "Bug52032_1.doc, true",
+ "Bug52032_2.doc, true",
+ "Bug52032_3.doc, true",
+ // Bug 53380 - ArrayIndexOutOfBounds Exception parsing word 97 document
+ "Bug53380_1.doc, true",
+ "Bug53380_2.doc, true",
+ "Bug53380_3.doc, true",
+ "Bug53380_4.doc, true",
+ // Bug 61268 - NegativeArraySizeException parsing word 97 document
+ "Bug61268.doc, false",
+ // Regression tests - testRegressionIn315beta2
+ "cap.stanford.edu_profiles_viewbiosketch_facultyid=4009&name=m_maciver.doc, true",
+ "ca.kwsymphony.www_education_School_Concert_Seat_Booking_Form_2011-12.doc, true",
+ // Bug 33519 - HWPF fails to read a file
+ "Bug33519.doc, false"
+ })
+ void testBug51834(String file, boolean doReadBack) throws Exception {
+ try (HWPFDocument doc = openSampleFile(file)) {
+ assertNotNull(WordToTextConverter.getText(doc));
+ if (doReadBack) {
+ try (HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc)) {
+ assertNotNull(WordToTextConverter.getText(doc2));
+ }
+ }
+ }
}
- /**
- * Bug 53380 - ArrayIndexOutOfBounds Exception parsing word 97 document
- */
- @Test
- void testBug53380_3() throws Exception {
- assertNotNull(getText("Bug53380_3.doc"));
- }
-
- /**
- * Bug 53380 - ArrayIndexOutOfBounds Exception parsing word 97 document
- */
- @Test
- void testBug53380_4() throws Exception {
- assertNotNull(getText("Bug53380_4.doc"));
- }
/**
* java.lang.UnsupportedOperationException: Non-extended character
- * Pascal strings are not supported right now
- *
+ * Pascal strings are not supported right now
+ * <p>
* Disabled pending a fix for the bug
*/
@Test
- void test56880() {
- HWPFDocument doc =
- HWPFTestDataSamples.openSampleFile("56880.doc");
- assertEqualsIgnoreNewline("Check Request", doc.getRange().text());
- }
-
- /**
- * Bug 61268 - NegativeArraySizeException parsing word 97 document
- */
- @Test
- void testBug61268() throws Exception {
- assertNotNull(getText("Bug61268.doc"));
+ void test56880() throws IOException {
+ try (HWPFDocument doc = openSampleFile("56880.doc")) {
+ assertEqualsIgnoreNewline("Check Request", doc.getRange().text());
+ }
}
private int section2LeftMargin = 1440;
@Test
@SuppressWarnings("SuspiciousNameCombination")
void testHWPFSections() {
- HWPFDocument document = HWPFTestDataSamples.openSampleFile("Bug53453Section.doc");
+ HWPFDocument document = openSampleFile("Bug53453Section.doc");
Range overallRange = document.getOverallRange();
int numParas = overallRange.numParagraphs();
- for(int i = 0; i < numParas; i++) {
+ for (int i = 0; i < numParas; i++) {
Paragraph para = overallRange.getParagraph(i);
int numSections = para.numSections();
- for(int j = 0; j < numSections; j++) {
+ for (int j = 0; j < numSections; j++) {
Section section = para.getSection(j);
- if(para.text().trim().equals("Section1")) {
+ if (para.text().trim().equals("Section1")) {
assertSection1Margin(section);
- }
- else if(para.text().trim().equals("Section2")) {
+ } else if (para.text().trim().equals("Section2")) {
assertSection2Margin(section);
// Change the margin widths
- this.section2BottomMargin = (int)(1.5 * AbstractWordUtils.TWIPS_PER_INCH);
- this.section2TopMargin = (int)(1.75 * AbstractWordUtils.TWIPS_PER_INCH);
- this.section2LeftMargin = (int)(0.5 * AbstractWordUtils.TWIPS_PER_INCH);
- this.section2RightMargin = (int)(0.75 * AbstractWordUtils.TWIPS_PER_INCH);
+ this.section2BottomMargin = (int) (1.5 * AbstractWordUtils.TWIPS_PER_INCH);
+ this.section2TopMargin = (int) (1.75 * AbstractWordUtils.TWIPS_PER_INCH);
+ this.section2LeftMargin = (int) (0.5 * AbstractWordUtils.TWIPS_PER_INCH);
+ this.section2RightMargin = (int) (0.75 * AbstractWordUtils.TWIPS_PER_INCH);
section.setMarginBottom(this.section2BottomMargin);
section.setMarginLeft(this.section2LeftMargin);
section.setMarginRight(this.section2RightMargin);
document = HWPFTestDataSamples.writeOutAndReadBack(document);
overallRange = document.getOverallRange();
numParas = overallRange.numParagraphs();
- for(int i = 0; i < numParas; i++) {
+ for (int i = 0; i < numParas; i++) {
Paragraph para = overallRange.getParagraph(i);
int numSections = para.numSections();
- for(int j = 0; j < numSections; j++) {
+ for (int j = 0; j < numSections; j++) {
Section section = para.getSection(j);
- if(para.text().trim().equals("Section1")) {
+ if (para.text().trim().equals("Section1")) {
// No changes to the margins in Section1
assertSection1Margin(section);
- }
- else if(para.text().trim().equals("Section2")) {
+ } else if (para.text().trim().equals("Section2")) {
// The margins in Section2 have kept the new settings.
assertSection2Margin(section);
}
assertEquals(section2NumColumns, section.getNumColumns());
}
- @Test
- void testRegressionIn315beta2() {
- HWPFDocument hwpfDocument = HWPFTestDataSamples.openSampleFile("cap.stanford.edu_profiles_viewbiosketch_facultyid=4009&name=m_maciver.doc");
- assertNotNull(hwpfDocument);
- }
-
@Test
void test57603SevenRowTable() throws Exception {
- try (HWPFDocument hwpfDocument = HWPFTestDataSamples.openSampleFile("57603-seven_columns.doc")) {
+ try (HWPFDocument hwpfDocument = openSampleFile("57603-seven_columns.doc")) {
assertThrows(ArrayIndexOutOfBoundsException.class, () -> HWPFTestDataSamples.writeOutAndReadBack(hwpfDocument));
}
}
@Test
void test57843() throws IOException {
- File f = POIDataSamples.getDocumentInstance().getFile("57843.doc");
- try (POIFSFileSystem fs = new POIFSFileSystem(f, true)) {
- HWPFOldDocument doc = new HWPFOldDocument(fs);
+ File f = getDocumentInstance().getFile("57843.doc");
+ try (POIFSFileSystem fs = new POIFSFileSystem(f, true);
+ HWPFOldDocument doc = new HWPFOldDocument(fs)) {
assertNotNull(doc);
- doc.close();
}
}
- @Test
- void testCommonCrawlRegression() throws IOException {
- HWPFDocument document = HWPFTestDataSamples.openSampleFile("ca.kwsymphony.www_education_School_Concert_Seat_Booking_Form_2011-12.doc");
- document.close();
- }
-
@Test
void test61911() throws IOException {
- HWPFDocument document = HWPFTestDataSamples.openSampleFile("61911.doc");
-
- PicturesTable picturesTable = document.getPicturesTable();
- List<Picture> pictures = picturesTable.getAllPictures();
- assertNotNull(pictures);
- assertEquals(0, pictures.size());
-
- document.close();
+ try (HWPFDocument document = openSampleFile("61911.doc")) {
+ PicturesTable picturesTable = document.getPicturesTable();
+ List<Picture> pictures = picturesTable.getAllPictures();
+ assertNotNull(pictures);
+ assertEquals(0, pictures.size());
+ }
}
@Test
void test61490CellCountInTable() throws Exception {
- try(HWPFDocument doc = HWPFTestDataSamples.openSampleFile("61490.doc")){
+ try (HWPFDocument doc = openSampleFile("61490.doc")) {
Range range = doc.getRange();
System.out.println("print table");
Table table = tableIter.next();
TableRow row = table.getRow(2);
assertEquals(3, row.numCells());
- for(int cellIdx = 0;cellIdx < row.numCells(); cellIdx++) {
+ for (int cellIdx = 0; cellIdx < row.numCells(); cellIdx++) {
TableCell cell = row.getCell(cellIdx);
- assertEquals("3" + (cellIdx+1), cell.text().trim());
+ assertEquals("3" + (cellIdx + 1), cell.text().trim());
}
}
}
@Test
void test59322() throws Exception {
- try(HWPFDocument doc = HWPFTestDataSamples.openSampleFile("59322.doc")) {
+ try (HWPFDocument doc = openSampleFile("59322.doc")) {
Document document = XMLHelper.newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
wordToHtmlConverter.processDocument(doc);
@Test
void test64132() throws IOException {
- try(HWPFDocument doc = HWPFTestDataSamples.openSampleFile("64132.doc")) {
+ try (HWPFDocument doc = openSampleFile("64132.doc")) {
assertNotNull(doc);
PicturesTable picturesTable = doc.getPicturesTable();
List<Picture> pictures = picturesTable.getAllPictures();
package org.apache.poi.hwpf.usermodel;
+import static org.apache.poi.hwpf.HWPFTestDataSamples.openSampleFile;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.FileOutputStream;
+import java.io.IOException;
import java.io.OutputStream;
import java.util.List;
import org.apache.poi.hwpf.model.PicturesTable;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
/**
* Test the picture handling
- *
- * @author Nick Burch
*/
public final class TestPictures {
- /**
- * two jpegs
- */
- @Test
- void testTwoImages() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("two_images.doc");
- List<Picture> pics = doc.getPicturesTable().getAllPictures();
-
- assertNotNull(pics);
- assertEquals(pics.size(), 2);
- for(int i=0; i<pics.size(); i++) {
- Picture pic = pics.get(i);
- assertNotNull(pic.suggestFileExtension());
- assertNotNull(pic.suggestFullFileName());
- }
-
- Picture picA = pics.get(0);
- Picture picB = pics.get(1);
- assertEquals("jpg", picA.suggestFileExtension());
- assertEquals("png", picB.suggestFileExtension());
- }
-
- /**
- * pngs and jpegs
- */
- @Test
- void testDifferentImages() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("testPictures.doc");
- List<Picture> pics = doc.getPicturesTable().getAllPictures();
-
- assertNotNull(pics);
- assertEquals(7, pics.size());
- for(Picture pic : pics) {
- assertNotNull(pic.suggestFileExtension());
- assertNotNull(pic.suggestFullFileName());
- }
-
- assertEquals("jpg", pics.get(0).suggestFileExtension());
- assertEquals("image/jpeg", pics.get(0).getMimeType());
- assertEquals("jpg", pics.get(1).suggestFileExtension());
- assertEquals("image/jpeg", pics.get(1).getMimeType());
- assertEquals("png", pics.get(3).suggestFileExtension());
- assertEquals("image/png", pics.get(3).getMimeType());
- assertEquals("png", pics.get(4).suggestFileExtension());
- assertEquals("image/png", pics.get(4).getMimeType());
- assertEquals("wmf", pics.get(5).suggestFileExtension());
- assertEquals("image/x-wmf", pics.get(5).getMimeType());
- assertEquals("jpg", pics.get(6).suggestFileExtension());
- assertEquals("image/jpeg", pics.get(6).getMimeType());
- }
-
- /**
- * emf image, nice and simple
- */
+ /**
+ * two jpegs
+ */
@Test
- void testEmfImage() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("vector_image.doc");
- List<Picture> pics = doc.getPicturesTable().getAllPictures();
-
- assertNotNull(pics);
- assertEquals(1, pics.size());
-
- Picture pic = pics.get(0);
- assertNotNull(pic.suggestFileExtension());
- assertNotNull(pic.suggestFullFileName());
- assertTrue(pic.getSize() > 128);
-
- // Check right contents
- byte[] emf = POIDataSamples.getDocumentInstance().readFile("vector_image.emf");
- byte[] pemf = pic.getContent();
- assertEquals(emf.length, pemf.length);
- for(int i=0; i<emf.length; i++) {
- assertEquals(emf[i], pemf[i]);
- }
- }
+ void testTwoImages() {
+ HWPFDocument doc = openSampleFile("two_images.doc");
+ List<Picture> pics = doc.getPicturesTable().getAllPictures();
- @Test
- void testPicturesWithTable() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug44603.doc");
+ assertNotNull(pics);
+ assertEquals(pics.size(), 2);
+ for (int i = 0; i < pics.size(); i++) {
+ Picture pic = pics.get(i);
+ assertNotNull(pic.suggestFileExtension());
+ assertNotNull(pic.suggestFullFileName());
+ }
- List<Picture> pics = doc.getPicturesTable().getAllPictures();
- assertEquals(2, pics.size());
- }
+ Picture picA = pics.get(0);
+ Picture picB = pics.get(1);
+ assertEquals("jpg", picA.suggestFileExtension());
+ assertEquals("png", picB.suggestFileExtension());
+ }
+ /**
+ * pngs and jpegs
+ */
@Test
- void testPicturesInHeader() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("header_image.doc");
-
- List<Picture> pics = doc.getPicturesTable().getAllPictures();
- assertEquals(2, pics.size());
- }
+ void testDifferentImages() {
+ HWPFDocument doc = openSampleFile("testPictures.doc");
+ List<Picture> pics = doc.getPicturesTable().getAllPictures();
- @Test
- void testFastSaved() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("rasp.doc");
+ assertNotNull(pics);
+ assertEquals(7, pics.size());
+ for (Picture pic : pics) {
+ assertNotNull(pic.suggestFileExtension());
+ assertNotNull(pic.suggestFullFileName());
+ }
- doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
+ assertEquals("jpg", pics.get(0).suggestFileExtension());
+ assertEquals("image/jpeg", pics.get(0).getMimeType());
+ assertEquals("jpg", pics.get(1).suggestFileExtension());
+ assertEquals("image/jpeg", pics.get(1).getMimeType());
+ assertEquals("png", pics.get(3).suggestFileExtension());
+ assertEquals("image/png", pics.get(3).getMimeType());
+ assertEquals("png", pics.get(4).suggestFileExtension());
+ assertEquals("image/png", pics.get(4).getMimeType());
+ assertEquals("wmf", pics.get(5).suggestFileExtension());
+ assertEquals("image/x-wmf", pics.get(5).getMimeType());
+ assertEquals("jpg", pics.get(6).suggestFileExtension());
+ assertEquals("image/jpeg", pics.get(6).getMimeType());
}
+ /**
+ * emf image, nice and simple
+ */
@Test
- void testFastSaved2() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("o_kurs.doc");
+ void testEmfImage() {
+ HWPFDocument doc = openSampleFile("vector_image.doc");
+ List<Picture> pics = doc.getPicturesTable().getAllPictures();
- doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
+ assertNotNull(pics);
+ assertEquals(1, pics.size());
+
+ Picture pic = pics.get(0);
+ assertNotNull(pic.suggestFileExtension());
+ assertNotNull(pic.suggestFullFileName());
+ assertTrue(pic.getSize() > 128);
+
+ // Check right contents
+ byte[] emf = POIDataSamples.getDocumentInstance().readFile("vector_image.emf");
+ byte[] pemf = pic.getContent();
+ assertEquals(emf.length, pemf.length);
+ for (int i = 0; i < emf.length; i++) {
+ assertEquals(emf[i], pemf[i]);
+ }
}
- @Test
- void testFastSaved3() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ob_is.doc");
+ @ParameterizedTest
+ @ValueSource(strings = {"Bug44603.doc", "header_image.doc"})
+ void testPictures(String file) throws IOException {
+ try (HWPFDocument doc = openSampleFile(file)) {
+ List<Picture> pics = doc.getPicturesTable().getAllPictures();
+ assertEquals(2, pics.size());
+ }
+ }
- doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
+ @ParameterizedTest
+ @ValueSource(strings = {"rasp.doc", "o_kurs.doc", "ob_is.doc"})
+ void testFastSaved(String file) throws IOException {
+ try (HWPFDocument doc = openSampleFile(file)) {
+ // just check that we do not throw Exception
+ assertDoesNotThrow(doc.getPicturesTable()::getAllPictures);
+ }
}
/**
* When you embed another office document into Word, it stores
- * a rendered "icon" picture of what that document looks like.
+ * a rendered "icon" picture of what that document looks like.
* This image is re-created when you edit the embeded document,
- * then used as-is to speed things up.
+ * then used as-is to speed things up.
* Check that we can properly read one of these
*/
@Test
void testEmbededDocumentIcon() {
- // This file has two embeded excel files, an embeded powerpoint
- // file and an embeded word file, in that order
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("word_with_embeded.doc");
-
- // Check we don't break loading the pictures
- doc.getPicturesTable().getAllPictures();
- PicturesTable pictureTable = doc.getPicturesTable();
-
- // Check the text, and its embeded images
- Paragraph p;
- Range r = doc.getRange();
- assertEquals(1, r.numSections());
- assertEquals(5, r.numParagraphs());
-
- p = r.getParagraph(0);
- assertEquals(2, p.numCharacterRuns());
- assertEquals("I have lots of embedded files in me\r", p.text());
+ // This file has two embeded excel files, an embeded powerpoint
+ // file and an embeded word file, in that order
+ HWPFDocument doc = openSampleFile("word_with_embeded.doc");
+
+ // Check we don't break loading the pictures
+ doc.getPicturesTable().getAllPictures();
+ PicturesTable pictureTable = doc.getPicturesTable();
+
+ // Check the text, and its embeded images
+ Paragraph p;
+ Range r = doc.getRange();
+ assertEquals(1, r.numSections());
+ assertEquals(5, r.numParagraphs());
+
+ p = r.getParagraph(0);
+ assertEquals(2, p.numCharacterRuns());
+ assertEquals("I have lots of embedded files in me\r", p.text());
assertFalse(pictureTable.hasPicture(p.getCharacterRun(0)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(1)));
- p = r.getParagraph(1);
- assertEquals(5, p.numCharacterRuns());
- assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
+ p = r.getParagraph(1);
+ assertEquals(5, p.numCharacterRuns());
+ assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
assertFalse(pictureTable.hasPicture(p.getCharacterRun(0)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(1)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(2)));
assertTrue(pictureTable.hasPicture(p.getCharacterRun(3)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(4)));
- p = r.getParagraph(2);
- assertEquals(6, p.numCharacterRuns());
- assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
+ p = r.getParagraph(2);
+ assertEquals(6, p.numCharacterRuns());
+ assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
assertFalse(pictureTable.hasPicture(p.getCharacterRun(0)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(1)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(2)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(4)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(5)));
- p = r.getParagraph(3);
- assertEquals(6, p.numCharacterRuns());
- assertEquals("\u0013 EMBED PowerPoint.Show.8 \u0014\u0001\u0015\r", p.text());
+ p = r.getParagraph(3);
+ assertEquals(6, p.numCharacterRuns());
+ assertEquals("\u0013 EMBED PowerPoint.Show.8 \u0014\u0001\u0015\r", p.text());
assertFalse(pictureTable.hasPicture(p.getCharacterRun(0)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(1)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(2)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(4)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(5)));
- p = r.getParagraph(4);
- assertEquals(6, p.numCharacterRuns());
- assertEquals("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.text());
+ p = r.getParagraph(4);
+ assertEquals(6, p.numCharacterRuns());
+ assertEquals("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.text());
assertFalse(pictureTable.hasPicture(p.getCharacterRun(0)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(1)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(2)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(4)));
assertFalse(pictureTable.hasPicture(p.getCharacterRun(5)));
- // Look at the pictures table
- List<Picture> pictures = pictureTable.getAllPictures();
- assertEquals(4, pictures.size());
-
- Picture picture = pictures.get( 0 );
- assertEquals( "emf", picture.suggestFileExtension() );
- assertEquals( "0.emf", picture.suggestFullFileName() );
- assertEquals( "image/x-emf", picture.getMimeType() );
-
- picture = pictures.get( 1 );
- assertEquals( "emf", picture.suggestFileExtension() );
- assertEquals( "469.emf", picture.suggestFullFileName() );
- assertEquals( "image/x-emf", picture.getMimeType() );
-
- picture = pictures.get( 2 );
- assertEquals( "emf", picture.suggestFileExtension() );
- assertEquals( "8c7.emf", picture.suggestFullFileName() );
- assertEquals( "image/x-emf", picture.getMimeType() );
-
- picture = pictures.get( 3 );
- assertEquals( "emf", picture.suggestFileExtension() );
- assertEquals( "10a8.emf", picture.suggestFullFileName() );
- assertEquals( "image/x-emf", picture.getMimeType() );
+ // Look at the pictures table
+ List<Picture> pictures = pictureTable.getAllPictures();
+ assertEquals(4, pictures.size());
+
+ Picture picture = pictures.get(0);
+ assertEquals("emf", picture.suggestFileExtension());
+ assertEquals("0.emf", picture.suggestFullFileName());
+ assertEquals("image/x-emf", picture.getMimeType());
+
+ picture = pictures.get(1);
+ assertEquals("emf", picture.suggestFileExtension());
+ assertEquals("469.emf", picture.suggestFullFileName());
+ assertEquals("image/x-emf", picture.getMimeType());
+
+ picture = pictures.get(2);
+ assertEquals("emf", picture.suggestFileExtension());
+ assertEquals("8c7.emf", picture.suggestFullFileName());
+ assertEquals("image/x-emf", picture.getMimeType());
+
+ picture = pictures.get(3);
+ assertEquals("emf", picture.suggestFileExtension());
+ assertEquals("10a8.emf", picture.suggestFullFileName());
+ assertEquals("image/x-emf", picture.getMimeType());
}
@Test
- void testEquation()
- {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "equation.doc" );
+ void testEquation() {
+ HWPFDocument doc = openSampleFile("equation.doc");
PicturesTable pictures = doc.getPicturesTable();
final List<Picture> allPictures = pictures.getAllPictures();
- assertEquals( 1, allPictures.size() );
-
- Picture picture = allPictures.get( 0 );
- assertNotNull( picture );
- assertEquals( PictureType.EMF, picture.suggestPictureType() );
- assertEquals( PictureType.EMF.getExtension(),
- picture.suggestFileExtension() );
- assertEquals( PictureType.EMF.getMime(), picture.getMimeType() );
- assertEquals( "0.emf", picture.suggestFullFileName() );
+ assertEquals(1, allPictures.size());
+
+ Picture picture = allPictures.get(0);
+ assertNotNull(picture);
+ assertEquals(PictureType.EMF, picture.suggestPictureType());
+ assertEquals(PictureType.EMF.getExtension(),
+ picture.suggestFileExtension());
+ assertEquals(PictureType.EMF.getMime(), picture.getMimeType());
+ assertEquals("0.emf", picture.suggestFullFileName());
}
/**
* In word you can have floating or fixed pictures.
* Fixed have a \u0001 in place with an offset to the
- * picture data.
+ * picture data.
* Floating have a \u0008 in place, which references a
- * \u0001 which has the offset. More than one can
- * reference the same \u0001
+ * \u0001 which has the offset. More than one can
+ * reference the same \u0001
*/
@Test
void testFloatingPictures() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("FloatingPictures.doc");
- PicturesTable pictures = doc.getPicturesTable();
-
- // There are 19 images in the picture, but some are
- // duplicate floating ones
- assertEquals(17, pictures.getAllPictures().size());
-
- int plain8s = 0;
- int escher8s = 0;
- int image1s = 0;
-
- Range r = doc.getRange();
- for(int np=0; np < r.numParagraphs(); np++) {
- Paragraph p = r.getParagraph(np);
- for(int nc=0; nc < p.numCharacterRuns(); nc++) {
- CharacterRun cr = p.getCharacterRun(nc);
- if(pictures.hasPicture(cr)) {
- image1s++;
- } else if(pictures.hasEscherPicture(cr)) {
- escher8s++;
- } else if(cr.text().startsWith("\u0008")) {
- plain8s++;
- }
- }
- }
- // Total is 20, as the 4 escher 8s all reference
- // the same regular image
- assertEquals(16, image1s);
- assertEquals(4, escher8s);
- assertEquals(0, plain8s);
+ HWPFDocument doc = openSampleFile("FloatingPictures.doc");
+ PicturesTable pictures = doc.getPicturesTable();
+
+ // There are 19 images in the picture, but some are
+ // duplicate floating ones
+ assertEquals(17, pictures.getAllPictures().size());
+
+ int plain8s = 0;
+ int escher8s = 0;
+ int image1s = 0;
+
+ Range r = doc.getRange();
+ for (int np = 0; np < r.numParagraphs(); np++) {
+ Paragraph p = r.getParagraph(np);
+ for (int nc = 0; nc < p.numCharacterRuns(); nc++) {
+ CharacterRun cr = p.getCharacterRun(nc);
+ if (pictures.hasPicture(cr)) {
+ image1s++;
+ } else if (pictures.hasEscherPicture(cr)) {
+ escher8s++;
+ } else if (cr.text().startsWith("\u0008")) {
+ plain8s++;
+ }
+ }
+ }
+ // Total is 20, as the 4 escher 8s all reference
+ // the same regular image
+ assertEquals(16, image1s);
+ assertEquals(4, escher8s);
+ assertEquals(0, plain8s);
}
- @SuppressWarnings( "deprecation" )
+ @SuppressWarnings("deprecation")
@Test
void testCroppedPictures() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("testCroppedPictures.doc");
+ HWPFDocument doc = openSampleFile("testCroppedPictures.doc");
List<Picture> pics = doc.getPicturesTable().getAllPictures();
assertNotNull(pics);
assertEquals(2, pics.size());
Picture pic1 = pics.get(0);
- assertEquals( -1, pic1.getWidth(), "FIXME: unable to get image width" );
+ assertEquals(-1, pic1.getWidth(), "FIXME: unable to get image width");
assertEquals(270, pic1.getHorizontalScalingFactor());
assertEquals(271, pic1.getVerticalScalingFactor());
assertEquals(12000, pic1.getDxaGoal()); // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000
assertEquals(0, pic1.getDyaCropBottom());
Picture pic2 = pics.get(1);
- assertEquals( -1, pic2.getWidth(), "FIXME: unable to get image width" );
+ assertEquals(-1, pic2.getWidth(), "FIXME: unable to get image width");
assertEquals(764, pic2.getHorizontalScalingFactor());
assertEquals(685, pic2.getVerticalScalingFactor());
assertEquals(12000, pic2.getDxaGoal()); // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000
@Test
void testPictureDetectionWithPNG() {
- HWPFDocument document = HWPFTestDataSamples.openSampleFile("PngPicture.doc");
+ HWPFDocument document = openSampleFile("PngPicture.doc");
PicturesTable pictureTable = document.getPicturesTable();
assertEquals(1, pictureTable.getAllPictures().size());
@Test
void testPictureWithAlternativeText() {
- HWPFDocument document = HWPFTestDataSamples.openSampleFile("Picture_Alternative_Text.doc");
+ HWPFDocument document = openSampleFile("Picture_Alternative_Text.doc");
PicturesTable pictureTable = document.getPicturesTable();
Picture picture = pictureTable.getAllPictures().get(0);
@Disabled("This bug is not fixed yet")
@Test
void test58804_1() throws Exception {
- HWPFDocument docA = HWPFTestDataSamples.openSampleFile("58804_1.doc");
+ HWPFDocument docA = openSampleFile("58804_1.doc");
expectImages(docA, 1);
@Disabled("This bug is not fixed yet")
@Test
void test58804() throws Exception {
- HWPFDocument docA = HWPFTestDataSamples.openSampleFile("58804.doc");
+ HWPFDocument docA = openSampleFile("58804.doc");
expectImages(docA, 7);
package org.apache.poi.hwpf.usermodel;
+import static org.apache.poi.hwpf.HWPFTestDataSamples.openSampleFile;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
*/
@Test
void testListEntryNoListTable() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ListEntryNoListTable.doc");
-
- Range r = doc.getRange();
- for (int x = 0; x < r.numSections(); x++) {
- Section s = r.getSection(x);
- for (int y = 0; y < s.numParagraphs(); y++) {
- s.getParagraph(y);
+ try (HWPFDocument doc = openSampleFile("ListEntryNoListTable.doc")) {
+ Range r = doc.getRange();
+ for (int x = 0; x < r.numSections(); x++) {
+ Section s = r.getSection(x);
+ for (int y = 0; y < s.numParagraphs(); y++) {
+ assertNotNull(s.getParagraph(y));
+ }
}
}
-
- doc.close();
}
/**
*/
@Test
void testSprmAIOOB() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("AIOOB-Tap.doc");
-
- StyleSheet styleSheet = doc.getStyleSheet();
- assertNotNull(styleSheet);
-
- Range r = doc.getRange();
- for (int x = 0; x < r.numSections(); x++) {
- Section s = r.getSection(x);
- for (int y = 0; y < s.numParagraphs(); y++) {
- Paragraph paragraph = s.getParagraph(y);
- assertNotNull(paragraph);
+ try (HWPFDocument doc = openSampleFile("AIOOB-Tap.doc")) {
+ StyleSheet styleSheet = doc.getStyleSheet();
+ assertNotNull(styleSheet);
+
+ Range r = doc.getRange();
+ for (int x = 0; x < r.numSections(); x++) {
+ Section s = r.getSection(x);
+ for (int y = 0; y < s.numParagraphs(); y++) {
+ Paragraph paragraph = s.getParagraph(y);
+ assertNotNull(paragraph);
+ }
}
}
- doc.close();
}
/**
*/
@Test
void testTableCellLastParagraph() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug44292.doc");
- Range r = doc.getRange();
- assertEquals(6, r.numParagraphs());
- assertEquals(0, r.getStartOffset());
- assertEquals(87, r.getEndOffset());
-
- // Paragraph with table
- Paragraph p = r.getParagraph(0);
- assertEquals(0, p.getStartOffset());
- assertEquals(20, p.getEndOffset());
-
- // Check a few bits of the table directly
- assertEquals("One paragraph is ok\7", r.getParagraph(0).text());
- assertEquals("First para is ok\r", r.getParagraph(1).text());
- assertEquals("Second paragraph is skipped\7", r.getParagraph(2).text());
- assertEquals("One paragraph is ok\7", r.getParagraph(3).text());
- assertEquals("\7", r.getParagraph(4).text());
- assertEquals("\r", r.getParagraph(5).text());
-
- // Get the table
- Table t = r.getTable(p);
-
- // get the only row
- assertEquals(1, t.numRows());
- TableRow row = t.getRow(0);
-
- // sanity check our row
- assertEquals(5, row.numParagraphs());
- assertEquals(0, row._parStart);
- assertEquals(5, row._parEnd);
- assertEquals(0, row.getStartOffset());
- assertEquals(86, row.getEndOffset());
-
- // get the first cell
- TableCell cell = row.getCell(0);
- // First cell should have one paragraph
- assertEquals(1, cell.numParagraphs());
- assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
- assertEquals(0, cell._parStart);
- assertEquals(1, cell._parEnd);
- assertEquals(0, cell.getStartOffset());
- assertEquals(20, cell.getEndOffset());
-
- // get the second
- cell = row.getCell(1);
- // Second cell should be detected as having two paragraphs
- assertEquals(2, cell.numParagraphs());
- assertEquals("First para is ok\r", cell.getParagraph(0).text());
- assertEquals("Second paragraph is skipped\7",
+ try (HWPFDocument doc = openSampleFile("Bug44292.doc")) {
+ Range r = doc.getRange();
+ assertEquals(6, r.numParagraphs());
+ assertEquals(0, r.getStartOffset());
+ assertEquals(87, r.getEndOffset());
+
+ // Paragraph with table
+ Paragraph p = r.getParagraph(0);
+ assertEquals(0, p.getStartOffset());
+ assertEquals(20, p.getEndOffset());
+
+ // Check a few bits of the table directly
+ assertEquals("One paragraph is ok\7", r.getParagraph(0).text());
+ assertEquals("First para is ok\r", r.getParagraph(1).text());
+ assertEquals("Second paragraph is skipped\7", r.getParagraph(2).text());
+ assertEquals("One paragraph is ok\7", r.getParagraph(3).text());
+ assertEquals("\7", r.getParagraph(4).text());
+ assertEquals("\r", r.getParagraph(5).text());
+
+ // Get the table
+ Table t = r.getTable(p);
+
+ // get the only row
+ assertEquals(1, t.numRows());
+ TableRow row = t.getRow(0);
+
+ // sanity check our row
+ assertEquals(5, row.numParagraphs());
+ assertEquals(0, row._parStart);
+ assertEquals(5, row._parEnd);
+ assertEquals(0, row.getStartOffset());
+ assertEquals(86, row.getEndOffset());
+
+ // get the first cell
+ TableCell cell = row.getCell(0);
+ // First cell should have one paragraph
+ assertEquals(1, cell.numParagraphs());
+ assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
+ assertEquals(0, cell._parStart);
+ assertEquals(1, cell._parEnd);
+ assertEquals(0, cell.getStartOffset());
+ assertEquals(20, cell.getEndOffset());
+
+ // get the second
+ cell = row.getCell(1);
+ // Second cell should be detected as having two paragraphs
+ assertEquals(2, cell.numParagraphs());
+ assertEquals("First para is ok\r", cell.getParagraph(0).text());
+ assertEquals("Second paragraph is skipped\7",
cell.getParagraph(1).text());
- assertEquals(1, cell._parStart);
- assertEquals(3, cell._parEnd);
- assertEquals(20, cell.getStartOffset());
- assertEquals(65, cell.getEndOffset());
-
- // get the last cell
- cell = row.getCell(2);
- // Last cell should have one paragraph
- assertEquals(1, cell.numParagraphs());
- assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
- assertEquals(3, cell._parStart);
- assertEquals(4, cell._parEnd);
- assertEquals(65, cell.getStartOffset());
- assertEquals(85, cell.getEndOffset());
-
- doc.close();
+ assertEquals(1, cell._parStart);
+ assertEquals(3, cell._parEnd);
+ assertEquals(20, cell.getStartOffset());
+ assertEquals(65, cell.getEndOffset());
+
+ // get the last cell
+ cell = row.getCell(2);
+ // Last cell should have one paragraph
+ assertEquals(1, cell.numParagraphs());
+ assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
+ assertEquals(3, cell._parStart);
+ assertEquals(4, cell._parEnd);
+ assertEquals(65, cell.getStartOffset());
+ assertEquals(85, cell.getEndOffset());
+ }
}
@Test
void testRangeDelete() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug28627.doc");
-
- Range range = doc.getRange();
- int numParagraphs = range.numParagraphs();
+ try (HWPFDocument doc = openSampleFile("Bug28627.doc")) {
+ Range range = doc.getRange();
+ int numParagraphs = range.numParagraphs();
- int totalLength = 0, deletedLength = 0;
+ int totalLength = 0, deletedLength = 0;
- for (int i = 0; i < numParagraphs; i++) {
- Paragraph para = range.getParagraph(i);
- String text = para.text();
+ for (int i = 0; i < numParagraphs; i++) {
+ Paragraph para = range.getParagraph(i);
+ String text = para.text();
- totalLength += text.length();
- if (text.contains("{delete me}")) {
- para.delete();
- deletedLength = text.length();
+ totalLength += text.length();
+ if (text.contains("{delete me}")) {
+ para.delete();
+ deletedLength = text.length();
+ }
}
- }
-
- // check the text length after deletion
- int newLength = 0;
- range = doc.getRange();
- numParagraphs = range.numParagraphs();
- for (int i = 0; i < numParagraphs; i++) {
- Paragraph para = range.getParagraph(i);
- String text = para.text();
+ // check the text length after deletion
+ int newLength = 0;
+ range = doc.getRange();
+ numParagraphs = range.numParagraphs();
- newLength += text.length();
- }
+ for (int i = 0; i < numParagraphs; i++) {
+ Paragraph para = range.getParagraph(i);
+ String text = para.text();
- assertEquals(newLength, totalLength - deletedLength);
+ newLength += text.length();
+ }
- doc.close();
+ assertEquals(newLength, totalLength - deletedLength);
+ }
}
/**
*/
@Test
void testEncryptedFile() {
- assertThrows(EncryptedDocumentException.class, () -> HWPFTestDataSamples.openSampleFile("PasswordProtected.doc"));
+ assertThrows(EncryptedDocumentException.class, () -> openSampleFile("PasswordProtected.doc"));
}
@Test
void testWriteProperties() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("SampleDoc.doc");
+ HWPFDocument doc = openSampleFile("SampleDoc.doc");
assertEquals("Nick Burch", doc.getSummaryInformation().getAuthor());
// Write and read
*/
@Test
void testReadParagraphsAfterReplaceText() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
+ HWPFDocument doc = openSampleFile("Bug45269.doc");
Range range = doc.getRange();
String toFind = "campo1";
}
}
- doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
+ doc = openSampleFile("Bug45269.doc");
range = doc.getRange();
// check replace with shorter text
@SuppressWarnings("deprecation")
@Test
void testProblemHeaderStories49936() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("HeaderFooterProblematic.doc");
+ HWPFDocument doc = openSampleFile("HeaderFooterProblematic.doc");
HeaderStories hs = new HeaderStories(doc);
assertEquals("", hs.getFirstHeader());
*/
@Test
void testParagraphPAPXNoParent45877() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45877.doc");
+ HWPFDocument doc = openSampleFile("Bug45877.doc");
assertEquals(17, doc.getRange().numParagraphs());
assertEquals("First paragraph\r",
*/
@Test
void testTableIterator() throws IOException {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("simple-table2.doc");
+ HWPFDocument doc = openSampleFile("simple-table2.doc");
Range r = doc.getRange();
// Check the text is as we'd expect
*/
@Test
void testOpen() throws IOException {
- openSampleFile(illustrativeDocFile).close();
+ try (HWPFDocument doc = openSampleFile(illustrativeDocFile)) {
+ assertEquals(5, doc.getParagraphTable().getParagraphs().size());
+ }
}
/**
// u201c and u201d are "smart-quotes"
private static final String originalText =
- "It is used to confirm that text insertion works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.\r";
+ "It is used to confirm that text insertion works even if Unicode characters (such as \u201c\u2014\u201d " +
+ "(U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present.\r";
private static final String textToInsert = "Look at me! I'm cool! ";
private static final int insertionPoint = 122;
*/
@Test
void testOpen() throws IOException {
- openSampleFile(illustrativeDocFile).close();
+ try (HWPFDocument daDoc = openSampleFile(illustrativeDocFile)) {
+ assertEquals(3, daDoc.getParagraphTable().getParagraphs().size());
+ }
}
/**