--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf.usermodel;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import junit.framework.AssertionFailedError;
+import junit.framework.TestCase;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.HWPFOldDocument;
+import org.apache.poi.hwpf.HWPFTestDataSamples;
+import org.apache.poi.hwpf.extractor.Word6Extractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.hwpf.model.FieldsDocumentPart;
+import org.apache.poi.hwpf.model.PlexOfField;
+import org.apache.poi.util.IOUtils;
+
+/**
+ * Test different problems reported in Apache Bugzilla
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
+ */
+public class TestBugs extends TestCase
+{
+
+ private static void assertTableStructures( Range expected, Range actual )
+ {
+ assertEquals( expected.numParagraphs(), actual.numParagraphs() );
+ for ( int p = 0; p < expected.numParagraphs(); p++ )
+ {
+ Paragraph expParagraph = expected.getParagraph( p );
+ Paragraph actParagraph = actual.getParagraph( p );
+
+ assertEquals( expParagraph.text(), actParagraph.text() );
+ assertEquals( "Diffent isInTable flags for paragraphs #" + p
+ + " -- " + expParagraph + " -- " + actParagraph + ".",
+ expParagraph.isInTable(), actParagraph.isInTable() );
+ assertEquals( expParagraph.isTableRowEnd(),
+ actParagraph.isTableRowEnd() );
+
+ if ( expParagraph.isInTable() && actParagraph.isInTable() )
+ {
+ Table expTable, actTable;
+ try
+ {
+ expTable = expected.getTable( expParagraph );
+ actTable = actual.getTable( actParagraph );
+ }
+ catch ( Exception exc )
+ {
+ continue;
+ }
+
+ assertEquals( expTable.numRows(), actTable.numRows() );
+ assertEquals( expTable.numParagraphs(),
+ actTable.numParagraphs() );
+ }
+ }
+ }
+
+ static void fixed( String bugzillaId )
+ {
+ throw new Error(
+ "Bug "
+ + bugzillaId
+ + " seems to be fixed. "
+ + "Please resolve the issue in Bugzilla and remove fail() from the test" );
+ }
+
+ private static void test47563_insertTable( int rows, int columns )
+ {
+ // POI apparently can't create a document from scratch,
+ // so we need an existing empty dummy document
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "empty.doc" );
+
+ Range range = doc.getRange();
+ Table table = range.insertBefore(
+ new TableProperties( (short) columns ), rows );
+ table.sanityCheck();
+ range.sanityCheck();
+
+ for ( int rowIdx = 0; rowIdx < table.numRows(); rowIdx++ )
+ {
+ TableRow row = table.getRow( rowIdx );
+ row.sanityCheck();
+ for ( int colIdx = 0; colIdx < row.numCells(); colIdx++ )
+ {
+ TableCell cell = row.getCell( colIdx );
+ cell.sanityCheck();
+
+ Paragraph par = cell.getParagraph( 0 );
+ par.sanityCheck();
+
+ par.insertBefore( "" + ( rowIdx * row.numCells() + colIdx ) );
+
+ par.sanityCheck();
+ cell.sanityCheck();
+ row.sanityCheck();
+ table.sanityCheck();
+ range.sanityCheck();
+ }
+ }
+
+ String text = range.text();
+ int mustBeAfter = 0;
+ for ( int i = 0; i < rows * columns; i++ )
+ {
+ int next = text.indexOf( Integer.toString( i ), mustBeAfter );
+ assertFalse( next == -1 );
+ mustBeAfter = next;
+ }
+ }
+
+ /**
+ * Bug 33519 - HWPF fails to read a file
+ */
+ public void test33519()
+ {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug33519.doc" );
+ WordExtractor extractor = new WordExtractor( doc );
+ extractor.getText();
+ }
+
+ /**
+ * Bug 34898 - WordExtractor doesn't read the whole string from the file
+ */
+ public void test34898()
+ {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug34898.doc" );
+ WordExtractor extractor = new WordExtractor( doc );
+ assertEquals( "\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", extractor
+ .getText().trim() );
+ }
+
+ /**
+ * Bug 44331 - HWPFDocument.write destroys fields
+ */
+ public void test44431()
+ {
+ HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" );
+ WordExtractor extractor1 = new WordExtractor( doc1 );
+
+ HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
+ WordExtractor extractor2 = new WordExtractor( doc2 );
+
+ assertEquals( extractor1.getFooterText(), extractor2.getFooterText() );
+ assertEquals( extractor1.getHeaderText(), extractor2.getHeaderText() );
+ assertEquals( Arrays.toString( extractor1.getParagraphText() ),
+ Arrays.toString( extractor2.getParagraphText() ) );
+
+ assertEquals( extractor1.getText(), extractor2.getText() );
+ }
+
+ /**
+ * Bug 45473 - HWPF cannot read file after save
+ */
+ public void test45473()
+ {
+ HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug45473.doc" );
+ String text1 = new WordExtractor( doc1 ).getText().trim();
+
+ HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
+ String text2 = new WordExtractor( doc2 ).getText().trim();
+
+ // the text in the saved document has some differences in line
+ // separators but we tolerate that
+ assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
+ }
+
+ /**
+ * Bug 46220 - images are not properly extracted
+ */
+ public void test46220()
+ {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46220.doc" );
+ // reference checksums as in Bugzilla
+ String[] md5 = { "851be142bce6d01848e730cb6903f39e",
+ "7fc6d8fb58b09ababd036d10a0e8c039",
+ "a7dc644c40bc2fbf17b2b62d07f99248",
+ "72d07b8db5fad7099d90bc4c304b4666" };
+ List<Picture> pics = doc.getPicturesTable().getAllPictures();
+ assertEquals( 4, pics.size() );
+ for ( int i = 0; i < pics.size(); i++ )
+ {
+ Picture pic = pics.get( i );
+ byte[] data = pic.getRawContent();
+ // use Apache Commons Codec utils to compute md5
+ assertEquals( md5[i], DigestUtils.md5Hex( data ) );
+ }
+ }
+
+ /**
+ * [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells
+ * missing
+ */
+ public void test46817()
+ {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" );
+ WordExtractor extractor = new WordExtractor( doc );
+ String text = extractor.getText().trim();
+
+ assertTrue( text.contains( "Nazwa wykonawcy" ) );
+ assertTrue( text.contains( "kujawsko-pomorskie" ) );
+ assertTrue( text.contains( "ekomel@ekomel.com.pl" ) );
+ }
+
+ /**
+ * [FAILING] Bug 47286 - Word documents saves in wrong format if source
+ * contains form elements
+ *
+ * @throws IOException
+ */
+ public void test47286() throws IOException
+ {
+ HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug47286.doc" );
+ String text1 = new WordExtractor( doc1 ).getText().trim();
+
+ HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
+ String text2 = new WordExtractor( doc2 ).getText().trim();
+
+ // the text in the saved document has some differences in line
+ // separators but we tolerate that
+ assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
+
+ assertEquals( doc1.getCharacterTable().getTextRuns().size(), doc2
+ .getCharacterTable().getTextRuns().size() );
+
+ List<PlexOfField> expectedFields = doc1.getFieldsTables()
+ .getFieldsPLCF( FieldsDocumentPart.MAIN );
+ List<PlexOfField> actualFields = doc2.getFieldsTables().getFieldsPLCF(
+ FieldsDocumentPart.MAIN );
+ assertEquals( expectedFields.size(), actualFields.size() );
+
+ assertTableStructures( doc1.getRange(), doc2.getRange() );
+ }
+
+ /**
+ * [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
+ * CharacterRun.replaceText()
+ */
+ public void test47287()
+ {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" );
+ String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
+ "1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
+ int usedVal = 0;
+ String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
+ Range r = doc.getRange();
+ for ( int x = 0; x < r.numSections(); x++ )
+ {
+ Section s = r.getSection( x );
+ for ( int y = 0; y < s.numParagraphs(); y++ )
+ {
+ Paragraph p = s.getParagraph( y );
+
+ for ( int z = 0; z < p.numCharacterRuns(); z++ )
+ {
+ boolean isFound = false;
+
+ // character run
+ CharacterRun run = p.getCharacterRun( z );
+ // character run text
+ String text = run.text();
+ String oldText = text;
+ int c = text.indexOf( "FORMTEXT " );
+ if ( c < 0 )
+ {
+ int k = text.indexOf( PLACEHOLDER );
+ if ( k >= 0 )
+ {
+ text = text.substring( 0, k ) + values[usedVal]
+ + text.substring( k + PLACEHOLDER.length() );
+ usedVal++;
+ isFound = true;
+ }
+ }
+ else
+ {
+ for ( ; c >= 0; c = text.indexOf( "FORMTEXT ", c
+ + "FORMTEXT ".length() ) )
+ {
+ int k = text.indexOf( PLACEHOLDER, c );
+ if ( k >= 0 )
+ {
+ text = text.substring( 0, k )
+ + values[usedVal]
+ + text.substring( k
+ + PLACEHOLDER.length() );
+ usedVal++;
+ isFound = true;
+ }
+ }
+ }
+ if ( isFound )
+ {
+ run.replaceText( oldText, text, 0 );
+ }
+
+ }
+ }
+ }
+
+ String docText = r.text();
+
+ assertTrue( docText.contains( "1-1" ) );
+ assertTrue( docText.contains( "1-12" ) );
+
+ assertFalse( docText.contains( "1-13" ) );
+ assertFalse( docText.contains( "1-15" ) );
+ }
+
+ /**
+ * [RESOLVED FIXED] Bug 47563 - Exception when working with table
+ */
+ public void test47563()
+ {
+ test47563_insertTable( 1, 5 );
+ test47563_insertTable( 1, 6 );
+ test47563_insertTable( 5, 1 );
+ test47563_insertTable( 6, 1 );
+ test47563_insertTable( 2, 2 );
+ test47563_insertTable( 3, 2 );
+ test47563_insertTable( 2, 3 );
+ test47563_insertTable( 3, 3 );
+ }
+
+ /**
+ * [FAILING] Bug 47731 - Word Extractor considers text copied from some
+ * website as an embedded object
+ */
+ public void test47731() throws Exception
+ {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" );
+ String foundText = new WordExtractor( doc ).getText();
+
+ try
+ {
+ assertTrue( foundText
+ .contains( "Soak the rice in water for three to four hours" ) );
+
+ fixed( "47731" );
+ }
+ catch ( AssertionFailedError exc )
+ {
+ // expected
+ }
+ }
+
+ /**
+ * Bug 4774 - text extracted by WordExtractor is broken
+ */
+ public void test47742() throws Exception
+ {
+
+ // (1) extract text from MS Word document via POI
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47742.doc" );
+ String foundText = new WordExtractor( doc ).getText();
+
+ // (2) read text from text document (retrieved by saving the word
+ // document as text file using encoding UTF-8)
+ InputStream is = POIDataSamples.getDocumentInstance()
+ .openResourceAsStream( "Bug47742-text.txt" );
+ byte[] expectedBytes = IOUtils.toByteArray( is );
+ String expectedText = new String( expectedBytes, "utf-8" )
+ .substring( 1 ); // strip-off the unicode marker
+
+ assertEquals( expectedText, foundText );
+ }
+
+ /**
+ * [FAILING] Bug 47958 - Exception during Escher walk of pictures
+ */
+ public void test47958()
+ {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47958.doc" );
+ try
+ {
+ for ( Picture pic : doc.getPicturesTable().getAllPictures() )
+ {
+ System.out.println( pic.suggestFullFileName() );
+ }
+ fixed( "47958" );
+ }
+ catch ( Exception e )
+ {
+ // expected exception
+ }
+ }
+
+ /**
+ * [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing
+ * formatting)
+ */
+ public void test48065()
+ {
+ HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug48065.doc" );
+ HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
+
+ Range expected = doc1.getRange();
+ Range actual = doc2.getRange();
+
+ assertEquals(
+ expected.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ),
+ actual.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ) );
+
+ assertTableStructures( expected, actual );
+ }
+
+ public void test49933()
+ {
+ HWPFOldDocument document = HWPFTestDataSamples
+ .openOldSampleFile( "Bug49933.doc" );
+
+ Word6Extractor word6Extractor = new Word6Extractor( document );
+ String text = word6Extractor.getText();
+
+ assertTrue( text.contains( "best.wine.jump.ru" ) );
+ }
+
+ /**
+ * Bug 50936 - HWPF fails to read a file
+ */
+ public void test50936()
+ {
+ HWPFTestDataSamples.openSampleFile( "Bug50936.doc" );
+ }
+
+ /**
+ * [FAILING] Bug 50955 - error while retrieving the text file
+ */
+ public void test50955()
+ {
+ try
+ {
+ HWPFOldDocument doc = HWPFTestDataSamples
+ .openOldSampleFile( "Bug50955.doc" );
+ Word6Extractor extractor = new Word6Extractor( doc );
+ extractor.getText();
+
+ fixed( "50955" );
+ }
+ catch ( Exception e )
+ {
+ // expected exception
+ }
+ }
+
+ /**
+ * Bug 51524 - PapBinTable constructor is slow
+ */
+ public void test51524()
+ {
+ HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
+ }
+
+}
package org.apache.poi.hwpf.usermodel;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.List;
-import junit.framework.AssertionFailedError;
-import org.apache.commons.codec.digest.DigestUtils;
import org.apache.poi.EncryptedDocumentException;
-import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
-import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.HWPFTestCase;
import org.apache.poi.hwpf.HWPFTestDataSamples;
-import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.hwpf.model.FieldsDocumentPart;
-import org.apache.poi.hwpf.model.PlexOfField;
import org.apache.poi.hwpf.model.StyleSheet;
-import org.apache.poi.util.IOUtils;
/**
* Test various problem documents
assertEquals("Row 3/Cell 3\u0007", cell.text());
}
- static void fixed(String bugzillaId) {
- throw new Error("Bug " + bugzillaId + " seems to be fixed. " +
- "Please resolve the issue in Bugzilla and remove fail() from the test");
- }
-
- /**
- * Bug 33519 - HWPF fails to read a file
- */
- public void test33519() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug33519.doc");
- WordExtractor extractor = new WordExtractor(doc);
- String text = extractor.getText();
- }
-
- /**
- * Bug 34898 - WordExtractor doesn't read the whole string from the file
- */
- public void test34898() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug34898.doc");
- WordExtractor extractor = new WordExtractor(doc);
- assertEquals("\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", extractor.getText().trim());
- }
-
- /**
- * Bug 44331 - HWPFDocument.write destroys fields
- */
- public void test44431()
- {
- HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug44431.doc" );
- WordExtractor extractor1 = new WordExtractor( doc1 );
-
- HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
- WordExtractor extractor2 = new WordExtractor( doc2 );
-
- assertEquals( extractor1.getFooterText(), extractor2.getFooterText() );
- assertEquals( extractor1.getHeaderText(), extractor2.getHeaderText() );
- assertEquals( Arrays.toString( extractor1.getParagraphText() ),
- Arrays.toString( extractor2.getParagraphText() ) );
-
- assertEquals( extractor1.getText(), extractor2.getText() );
- }
-
- /**
- * [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells
- * missing
- */
- public void test46817()
- {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" );
- WordExtractor extractor = new WordExtractor( doc );
- String text = extractor.getText().trim();
-
- assertTrue( text.contains( "Nazwa wykonawcy" ) );
- assertTrue( text.contains( "kujawsko-pomorskie" ) );
- assertTrue( text.contains( "ekomel@ekomel.com.pl" ) );
- }
-
- /**
- * Bug 46220 - images are not properly extracted
- */
- public void test46220() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug46220.doc");
- // reference checksums as in Bugzilla
- String[] md5 = {
- "851be142bce6d01848e730cb6903f39e",
- "7fc6d8fb58b09ababd036d10a0e8c039",
- "a7dc644c40bc2fbf17b2b62d07f99248",
- "72d07b8db5fad7099d90bc4c304b4666"
- };
- List<Picture> pics = doc.getPicturesTable().getAllPictures();
- assertEquals(4, pics.size());
- for (int i = 0; i < pics.size(); i++) {
- Picture pic = pics.get(i);
- byte[] data = pic.getRawContent();
- // use Apache Commons Codec utils to compute md5
- assertEquals(md5[i], DigestUtils.md5Hex(data));
- }
- }
-
- /**
- * Bug 45473 - HWPF cannot read file after save
- */
- public void test45473() {
- HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug45473.doc");
- String text1 = new WordExtractor(doc1).getText().trim();
-
- HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
- String text2 = new WordExtractor(doc2).getText().trim();
-
- // the text in the saved document has some differences in line separators but we tolerate that
- assertEquals(text1.replaceAll("\n", ""), text2.replaceAll("\n", ""));
- }
-
- /**
- * [FAILING] Bug 47286 - Word documents saves in wrong format if source
- * contains form elements
- *
- * @throws IOException
- */
- public void test47286() throws IOException
- {
- HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug47286.doc" );
- String text1 = new WordExtractor( doc1 ).getText().trim();
-
- HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
- String text2 = new WordExtractor( doc2 ).getText().trim();
-
- // the text in the saved document has some differences in line
- // separators but we tolerate that
- assertEquals( text1.replaceAll( "\n", "" ), text2.replaceAll( "\n", "" ) );
-
- assertEquals( doc1.getCharacterTable().getTextRuns().size(), doc2
- .getCharacterTable().getTextRuns().size() );
-
- List<PlexOfField> expectedFields = doc1.getFieldsTables()
- .getFieldsPLCF( FieldsDocumentPart.MAIN );
- List<PlexOfField> actualFields = doc2.getFieldsTables().getFieldsPLCF(
- FieldsDocumentPart.MAIN );
- assertEquals( expectedFields.size(), actualFields.size() );
-
- assertTableStructures( doc1.getRange(), doc2.getRange() );
- }
-
- /**
- * [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
- * CharacterRun.replaceText()
- */
- public void test47287()
- {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" );
- String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
- "1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
- int usedVal = 0;
- String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
- Range r = doc.getRange();
- for ( int x = 0; x < r.numSections(); x++ )
- {
- Section s = r.getSection( x );
- for ( int y = 0; y < s.numParagraphs(); y++ )
- {
- Paragraph p = s.getParagraph( y );
-
- for ( int z = 0; z < p.numCharacterRuns(); z++ )
- {
- boolean isFound = false;
-
- // character run
- CharacterRun run = p.getCharacterRun( z );
- // character run text
- String text = run.text();
- String oldText = text;
- int c = text.indexOf( "FORMTEXT " );
- if ( c < 0 )
- {
- int k = text.indexOf( PLACEHOLDER );
- if ( k >= 0 )
- {
- text = text.substring( 0, k ) + values[usedVal]
- + text.substring( k + PLACEHOLDER.length() );
- usedVal++;
- isFound = true;
- }
- }
- else
- {
- for ( ; c >= 0; c = text.indexOf( "FORMTEXT ", c
- + "FORMTEXT ".length() ) )
- {
- int k = text.indexOf( PLACEHOLDER, c );
- if ( k >= 0 )
- {
- text = text.substring( 0, k )
- + values[usedVal]
- + text.substring( k
- + PLACEHOLDER.length() );
- usedVal++;
- isFound = true;
- }
- }
- }
- if ( isFound )
- {
- run.replaceText( oldText, text, 0 );
- }
-
- }
- }
- }
-
- String docText = r.text();
-
- assertTrue( docText.contains( "1-1" ) );
- assertTrue( docText.contains( "1-12" ) );
-
- assertFalse( docText.contains( "1-13" ) );
- assertFalse( docText.contains( "1-15" ) );
- }
-
- private static void insertTable( int rows, int columns )
- {
- // POI apparently can't create a document from scratch,
- // so we need an existing empty dummy document
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "empty.doc" );
-
- Range range = doc.getRange();
- Table table = range.insertBefore(
- new TableProperties( (short) columns ), rows );
- table.sanityCheck();
- range.sanityCheck();
-
- for ( int rowIdx = 0; rowIdx < table.numRows(); rowIdx++ )
- {
- TableRow row = table.getRow( rowIdx );
- row.sanityCheck();
- for ( int colIdx = 0; colIdx < row.numCells(); colIdx++ )
- {
- TableCell cell = row.getCell( colIdx );
- cell.sanityCheck();
-
- Paragraph par = cell.getParagraph( 0 );
- par.sanityCheck();
-
- par.insertBefore( "" + ( rowIdx * row.numCells() + colIdx ) );
-
- par.sanityCheck();
- cell.sanityCheck();
- row.sanityCheck();
- table.sanityCheck();
- range.sanityCheck();
- }
- }
-
- String text = range.text();
- int mustBeAfter = 0;
- for ( int i = 0; i < rows * columns; i++ )
- {
- int next = text.indexOf( Integer.toString( i ), mustBeAfter );
- assertFalse( next == -1 );
- mustBeAfter = next;
- }
- }
-
- /**
- * [RESOLVED FIXED] Bug 47563 - Exception when working with table
- */
- public void test47563()
- {
- insertTable( 1, 5 );
- insertTable( 1, 6 );
- insertTable( 5, 1 );
- insertTable( 6, 1 );
- insertTable( 2, 2 );
- insertTable( 3, 2 );
- insertTable( 2, 3 );
- insertTable( 3, 3 );
- }
-
- /**
- * [FAILING] Bug 47731 - Word Extractor considers text copied from some
- * website as an embedded object
- */
- public void test47731() throws Exception
- {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" );
- String foundText = new WordExtractor( doc ).getText();
-
- try
- {
- assertTrue( foundText
- .contains( "Soak the rice in water for three to four hours" ) );
-
- fixed( "47731" );
- }
- catch ( AssertionFailedError exc )
- {
- // expected
- }
- }
-
- /**
- * Bug 4774 - text extracted by WordExtractor is broken
- */
- public void test47742() throws Exception {
-
- // (1) extract text from MS Word document via POI
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47742.doc");
- String foundText = new WordExtractor(doc).getText();
-
- // (2) read text from text document (retrieved by saving the word
- // document as text file using encoding UTF-8)
- InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug47742-text.txt");
- byte[] expectedBytes = IOUtils.toByteArray(is);
- String expectedText = new String(expectedBytes, "utf-8").substring(1); // strip-off the unicode marker
-
- assertEquals(expectedText, foundText);
- }
-
- /**
- * [FAILING] Bug 47958 - Exception during Escher walk of pictures
- */
- public void test47958() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47958.doc");
- try {
- for (Picture pic : doc.getPicturesTable().getAllPictures()) {
- System.out.println(pic.suggestFullFileName());
- }
- fixed("47958");
- } catch (Exception e) {
- // expected exception
- }
- }
-
- /**
- * [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing
- * formatting)
- */
- public void test48065()
- {
- HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug48065.doc" );
- HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
-
- Range expected = doc1.getRange();
- Range actual = doc2.getRange();
-
- assertEquals(
- expected.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ),
- actual.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ) );
-
- assertTableStructures( expected, actual );
- }
-
- private static void assertTableStructures( Range expected, Range actual )
- {
- assertEquals( expected.numParagraphs(), actual.numParagraphs() );
- for ( int p = 0; p < expected.numParagraphs(); p++ )
- {
- Paragraph expParagraph = expected.getParagraph( p );
- Paragraph actParagraph = actual.getParagraph( p );
-
- assertEquals( expParagraph.text(), actParagraph.text() );
- assertEquals( "Diffent isInTable flags for paragraphs #" + p
- + " -- " + expParagraph + " -- " + actParagraph + ".",
- expParagraph.isInTable(), actParagraph.isInTable() );
- assertEquals( expParagraph.isTableRowEnd(),
- actParagraph.isTableRowEnd() );
-
- if ( expParagraph.isInTable() && actParagraph.isInTable() )
- {
- Table expTable, actTable;
- try
- {
- expTable = expected.getTable( expParagraph );
- actTable = actual.getTable( actParagraph );
- }
- catch ( Exception exc )
- {
- continue;
- }
-
- assertEquals( expTable.numRows(), actTable.numRows() );
- assertEquals( expTable.numParagraphs(),
- actTable.numParagraphs() );
- }
- }
- }
-
- public void test49933()
- {
- HWPFOldDocument document = HWPFTestDataSamples
- .openOldSampleFile( "Bug49933.doc" );
-
- Word6Extractor word6Extractor = new Word6Extractor( document );
- String text = word6Extractor.getText();
-
- assertTrue( text.contains( "best.wine.jump.ru" ) );
- }
-
- /**
- * Bug 50936 - HWPF fails to read a file
- */
- public void test50936() {
- HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug50936.doc");
- }
-
- /**
- * [FAILING] Bug 50955 - error while retrieving the text file
- */
- public void test50955() {
- try {
- HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug50955.doc");
- Word6Extractor extractor = new Word6Extractor(doc);
- String text = extractor.getText();
- fixed("50955");
- } catch (Exception e) {
- // expected exception
- }
- }
-
- /**
- * Bug 51524 - PapBinTable constructor is slow
- */
- public void test51524()
- {
- HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
- }
-
}