git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1195133 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_8_BETA5
@@ -80,7 +80,7 @@ import org.apache.poi.util.Internal; | |||
*/ | |||
public final class HWPFDocument extends HWPFDocumentCore | |||
{ | |||
private static final String PROPERTY_PRESERVE_BIN_TABLES = "org.apache.poi.hwpf.preserveBinTables"; | |||
static final String PROPERTY_PRESERVE_BIN_TABLES = "org.apache.poi.hwpf.preserveBinTables"; | |||
private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable"; | |||
private static final String STREAM_DATA = "Data"; |
@@ -66,9 +66,10 @@ public class HWPFOldDocument extends HWPFDocumentCore { | |||
// We need to get hold of the text that makes up the | |||
// document, which might be regular or fast-saved | |||
ComplexFileTable cft = null; | |||
StringBuffer text = new StringBuffer(); | |||
if(_fib.getFibBase().isFComplex()) { | |||
ComplexFileTable cft = new ComplexFileTable( | |||
cft = new ComplexFileTable( | |||
_mainStream, _mainStream, | |||
complexTableOffset, _fib.getFibBase().getFcMin() | |||
); | |||
@@ -113,6 +114,27 @@ public class HWPFOldDocument extends HWPFDocumentCore { | |||
_mainStream, sedTableOffset, sedTableSize, | |||
_fib.getFibBase().getFcMin(), tpt | |||
); | |||
/* | |||
* in this mode we preserving PAPX/CHPX structure from file, so text may | |||
* miss from output, and text order may be corrupted | |||
*/ | |||
boolean preserveBinTables = false; | |||
try | |||
{ | |||
preserveBinTables = Boolean.parseBoolean( System | |||
.getProperty( HWPFDocument.PROPERTY_PRESERVE_BIN_TABLES ) ); | |||
} | |||
catch ( Exception exc ) | |||
{ | |||
// ignore; | |||
} | |||
if ( !preserveBinTables ) | |||
{ | |||
_cbt.rebuild( cft ); | |||
_pbt.rebuild( _text, cft ); | |||
} | |||
} | |||
public Range getOverallRange() |
@@ -17,8 +17,6 @@ | |||
package org.apache.poi.hwpf.model; | |||
import java.util.Collections; | |||
import org.apache.poi.poifs.common.POIFSConstants; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndian; | |||
@@ -57,7 +55,5 @@ public final class OldPAPBinTable extends PAPBinTable | |||
_paragraphs.add( papx ); | |||
} | |||
} | |||
Collections.sort( _paragraphs, PropertyNode.StartComparator.instance ); | |||
} | |||
} | |||
@@ -113,6 +113,12 @@ public class PAPBinTable | |||
public void rebuild( final StringBuilder docText, | |||
ComplexFileTable complexFileTable ) | |||
{ | |||
rebuild( docText, complexFileTable, _paragraphs ); | |||
} | |||
static void rebuild( final StringBuilder docText, | |||
ComplexFileTable complexFileTable, List<PAPX> paragraphs ) | |||
{ | |||
long start = System.currentTimeMillis(); | |||
@@ -156,19 +162,19 @@ public class PAPBinTable | |||
PAPX papx = new PAPX( textPiece.getStart(), | |||
textPiece.getEnd(), newSprmBuffer ); | |||
_paragraphs.add( papx ); | |||
paragraphs.add( papx ); | |||
} | |||
} | |||
logger.log( POILogger.DEBUG, | |||
"Merged (?) with PAPX from complex file table in ", | |||
Long.valueOf( System.currentTimeMillis() - start ), | |||
" ms (", Integer.valueOf( _paragraphs.size() ), | |||
" ms (", Integer.valueOf( paragraphs.size() ), | |||
" elements in total)" ); | |||
start = System.currentTimeMillis(); | |||
} | |||
List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( _paragraphs ); | |||
List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( paragraphs ); | |||
Collections.sort( oldPapxSortedByEndPos, | |||
PropertyNode.EndComparator.instance ); | |||
@@ -179,7 +185,7 @@ public class PAPBinTable | |||
final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>(); | |||
{ | |||
int counter = 0; | |||
for ( PAPX papx : _paragraphs ) | |||
for ( PAPX papx : paragraphs ) | |||
{ | |||
papxToFileOrder.put( papx, Integer.valueOf( counter++ ) ); | |||
} | |||
@@ -270,6 +276,9 @@ public class PAPBinTable | |||
SprmBuffer sprmBuffer = null; | |||
for ( PAPX papx : papxs ) | |||
{ | |||
if ( papx.getGrpprl() == null || papx.getGrpprl().length == 0 ) | |||
continue; | |||
if ( sprmBuffer == null ) | |||
try | |||
{ | |||
@@ -281,7 +290,9 @@ public class PAPBinTable | |||
throw new Error( e ); | |||
} | |||
else | |||
{ | |||
sprmBuffer.append( papx.getGrpprl(), 2 ); | |||
} | |||
} | |||
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer ); | |||
newPapxs.add( newPapx ); | |||
@@ -289,11 +300,12 @@ public class PAPBinTable | |||
lastParStart = endExclusive; | |||
continue; | |||
} | |||
this._paragraphs = new ArrayList<PAPX>( newPapxs ); | |||
paragraphs.clear(); | |||
paragraphs.addAll( newPapxs ); | |||
logger.log( POILogger.DEBUG, "PAPX rebuilded from document text in ", | |||
Long.valueOf( System.currentTimeMillis() - start ), " ms (", | |||
Integer.valueOf( _paragraphs.size() ), " elements)" ); | |||
Integer.valueOf( paragraphs.size() ), " elements)" ); | |||
start = System.currentTimeMillis(); | |||
} | |||
@@ -112,11 +112,17 @@ public final class PAPX extends BytePropertyNode<PAPX> { | |||
public byte[] getGrpprl() | |||
{ | |||
if (_buf == null) | |||
return new byte[0]; | |||
return ((SprmBuffer)_buf).toByteArray(); | |||
} | |||
public short getIstd() | |||
{ | |||
public short getIstd() | |||
{ | |||
if ( _buf == null ) | |||
return 0; | |||
byte[] buf = getGrpprl(); | |||
if (buf.length == 0) | |||
{ |
@@ -1101,7 +1101,7 @@ public class Range { // TODO -instantiable superclass | |||
int endIndex = binarySearchEnd( rpl, startIndex, end ); | |||
while ( endIndex < rpl.size() - 1 | |||
&& rpl.get( endIndex + 1 ).getEnd() <= end ) | |||
endIndex--; | |||
endIndex++; | |||
if ( startIndex < 0 || startIndex >= rpl.size() | |||
|| startIndex > endIndex || endIndex < 0 |
@@ -28,6 +28,8 @@ import java.util.List; | |||
import junit.framework.TestCase; | |||
import org.apache.poi.hwpf.converter.WordToTextConverter; | |||
import org.apache.commons.codec.digest.DigestUtils; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.hwpf.HWPFDocument; | |||
@@ -736,7 +738,8 @@ public class TestBugs extends TestCase | |||
*/ | |||
public void testBug51944() throws Exception | |||
{ | |||
HWPFTestDataSamples.openOldSampleFile( "Bug51944.doc" ); | |||
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile( "Bug51944.doc" ); | |||
WordToTextConverter.getText( doc ); | |||
} | |||
/** |