// Now load the rest of the properties, which need to be adjusted
// for where text really begin
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), cpMin, _tpt);
- _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt);
+ _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt, true);
// Read FSPA and Escher information
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
{
}
- public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset,
- int size, int fcMin, TextPieceTable tpt)
- {
+ /**
+ * @deprecated Use
+ * {@link #PAPBinTable(byte[],byte[],byte[],int,int,int,TextPieceTable,boolean)}
+ * instead
+ */
+ public PAPBinTable( byte[] documentStream, byte[] tableStream,
+ byte[] dataStream, int offset, int size, int fcMin,
+ TextPieceTable tpt )
+ {
+ this( documentStream, tableStream, dataStream, offset, size, fcMin,
+ tpt, true );
+ }
+
+ public PAPBinTable( byte[] documentStream, byte[] tableStream,
+ byte[] dataStream, int offset, int size, int fcMin,
+ TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
+ {
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
this.tpt = tpt;
PAPX papx = pfkp.getPAPX(y);
//we don't need PAPX if they are references nowhere
- if (tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() ))
+ if (!ignorePapxWithoutTextPieces || tpt.isIndexInTable( papx.getStartBytes(), papx.getEndBytes() ))
_paragraphs.add(papx);
}
}
while (overflow != null);
tableStream.write(binTable.toByteArray());
}
-
-
}
-
package org.apache.poi.hwpf.model;
-import junit.framework.*;
-import org.apache.poi.hwpf.*;
-import org.apache.poi.hwpf.model.io.*;
+import java.io.ByteArrayOutputStream;
+import java.util.ArrayList;
-import java.io.*;
-import java.util.*;
+import junit.framework.TestCase;
+import org.apache.poi.hwpf.HWPFDocFixture;
+import org.apache.poi.hwpf.model.io.HWPFFileSystem;
public final class TestPAPBinTable
extends TestCase
byte[] tableStream = _hWPFDocFixture._tableStream;
int fcMin = fib.getFcMin();
- _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT);
+ _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT, false);
HWPFFileSystem fileSys = new HWPFFileSystem();
byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray();
- PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT);
+ PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT, false);
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
ArrayList newTextRuns = newBinTable.getParagraphs();
package org.apache.poi.hwpf.usermodel;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
import java.io.InputStream;
import java.util.List;
assertEquals(text1.replaceAll("\n", ""), text2.replaceAll("\n", ""));
}
+ /**
+ * Bug 47286 - Word documents saves in wrong format if source contains form elements
+ * @throws IOException
+ */
+ public void test46286() throws IOException {
+ HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug46286.doc");
+ String text1 = new WordExtractor(doc1).getText().trim();
+
+ {
+ FileOutputStream fileOutputStream = new FileOutputStream( new File("test.doc") );
+ doc1.write( fileOutputStream );
+ fileOutputStream.close();
+ }
+
+ HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
+ String text2 = new WordExtractor(doc2).getText().trim();
+
+ // the text in the saved document has some differences in line separators but we tolerate that
+ assertEquals(text1.replaceAll("\n", ""), text2.replaceAll("\n", ""));
+ }
+
/**
* [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
* CharacterRun.replaceText()