// Now load the rest of the properties, which need to be adjusted
// for where text really begin
- _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _cft, _tpt, true);
- _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _cft, _tpt, true);
+ _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt);
+ _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt);
+
+ _cbt.rebuild( _cft );
+ _pbt.rebuild( _dataStream, _cft );
// Read FSPA and Escher information
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
mainStream, doc.getDataStream(), pageOffset,
- doc.getTextTable(), false );
+ doc.getTextTable() );
System.out.println( "* PFKP: " + pfkp );
* Constructor used to read a binTable in from a Word document.
*
* @deprecated Use
- * {@link #CHPBinTable(byte[],byte[],int,int,ComplexFileTable,TextPieceTable, boolean)}
+ * {@link #CHPBinTable(byte[],byte[],int,int,TextPieceTable)}
* instead
*/
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
int size, int fcMin, TextPieceTable tpt )
{
- this( documentStream, tableStream, offset, size, null, tpt, true );
+ this( documentStream, tableStream, offset, size, tpt );
}
/**
* Constructor used to read a binTable in from a Word document.
*/
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
- int size, ComplexFileTable complexFileTable, TextPieceTable tpt,
- boolean reconstructChpxTable )
+ int size, TextPieceTable tpt )
{
long start = System.currentTimeMillis();
/*
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
- pageOffset, tpt, reconstructChpxTable);
+ pageOffset, tpt);
int fkpSize = cfkp.size();
logger.log( POILogger.DEBUG, "CHPX FKPs loaded in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( _textRuns.size() ), " elements)" );
- start = System.currentTimeMillis();
-
- if ( !reconstructChpxTable )
- {
- Collections.sort( _textRuns );
+ }
- logger.log( POILogger.DEBUG, "CHPX sorted in ",
- Long.valueOf( System.currentTimeMillis() - start ), " ms" );
- return;
- }
+ public void rebuild( ComplexFileTable complexFileTable )
+ {
+ long start = System.currentTimeMillis();
if ( complexFileTable != null )
{
iterator.remove();
continue;
}
-
+
previous = current;
}
logger.log( POILogger.DEBUG, "CHPX compacted in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( _textRuns.size() ), " elements)" );
-}
+ }
private static int binarySearch( List<CHPX> chpxs, int startPosition )
{
import java.util.ArrayList;
import java.util.List;
+import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
* read from a Word file).
*
* @deprecated Use
- * {@link #CHPFormattedDiskPage(byte[],int,TextPieceTable,boolean)}
+ * {@link #CHPFormattedDiskPage(byte[],int,TextPieceTable)}
* instead
*/
@SuppressWarnings( "unused" )
public CHPFormattedDiskPage( byte[] documentStream, int offset, int fcMin,
TextPieceTable tpt )
{
- this( documentStream, offset, tpt, true );
+ this( documentStream, offset, tpt );
}
/**
* read from a Word file).
*/
public CHPFormattedDiskPage( byte[] documentStream, int offset,
- TextPieceTable tpt, boolean ignoreChpxWithoutTextPieces )
+ TextPieceTable tpt )
{
super( documentStream, offset );
for ( int x = 0; x < _crun; x++ )
{
- int startAt = getStart( x );
- int endAt = getEnd( x );
-
- if ( ignoreChpxWithoutTextPieces
- && !tpt.isIndexInTable( startAt, endAt ) )
- {
- logger.log( POILogger.WARN, "CHPX [",
- Integer.valueOf( startAt ), "; ",
- Integer.valueOf( endAt ),
- ") (bytes) doesn't have corresponding text pieces "
- + "and will be skipped" );
-
- _chpxList.add( null );
- continue;
- }
-
- CHPX chpx = new CHPX( startAt, endAt, tpt, getGrpprl( x ) );
-
- if ( ignoreChpxWithoutTextPieces
- && chpx.getStart() == chpx.getEnd() )
- {
- logger.log( POILogger.WARN, chpx
- + " references zero-length range and will be skipped" );
- _chpxList.add( null );
- continue;
- }
+ int bytesStartAt = getStart( x );
+ int bytesEndAt = getEnd( x );
+
+ int charStartAt = tpt.getCharIndex( bytesStartAt );
+ int charEndAt = tpt.getCharIndex( bytesEndAt, charStartAt );
+ // TODO: CHECK!
+ // CHPX chpx = new CHPX( bytesStartAt, bytesEndAt, tpt, getGrpprl( x
+ // ) );
+ CHPX chpx = new CHPX( charStartAt, charEndAt, new SprmBuffer(
+ getGrpprl( x ), 0 ) );
_chpxList.add( chpx );
}
}
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
- pageOffset, tpt, true);
+ pageOffset, tpt);
int fkpSize = cfkp.size();
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
- documentStream, pageOffset, tpt, true);
+ documentStream, pageOffset, tpt);
int fkpSize = pfkp.size();
import org.apache.poi.hwpf.sprm.SprmIterator;
import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
*
* @author Ryan Ackley
*/
+@Internal
public class PAPBinTable
{
private static final POILogger logger = POILogFactory
byte[] dataStream, int offset, int size, int fcMin,
TextPieceTable tpt )
{
- this( documentStream, tableStream, dataStream, offset, size, null, tpt, true );
+ this( documentStream, tableStream, dataStream, offset, size, tpt );
}
public PAPBinTable( byte[] documentStream, byte[] tableStream,
- byte[] dataStream, int offset, int size,
- ComplexFileTable complexFileTable, TextPieceTable tpt,
- boolean reconstructPapxTable )
+ byte[] dataStream, int offset, int size, TextPieceTable tpt )
{
long start = System.currentTimeMillis();
* pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
- documentStream, dataStream, pageOffset, tpt,
- reconstructPapxTable );
+ documentStream, dataStream, pageOffset, tpt );
int fkpSize = pfkp.size();
logger.log( POILogger.DEBUG, "PAPX tables loaded in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( _paragraphs.size() ), " elements)" );
- start = System.currentTimeMillis();
-
- if ( !reconstructPapxTable )
- {
- Collections.sort( _paragraphs );
+ }
- logger.log( POILogger.DEBUG, "PAPX sorted in ",
- Long.valueOf( System.currentTimeMillis() - start ), " ms" );
- return;
- }
+ public void rebuild( byte[] dataStream, ComplexFileTable complexFileTable )
+ {
+ long start = System.currentTimeMillis();
if ( complexFileTable != null )
{
import java.util.Collections;
import java.util.List;
-import org.apache.poi.hwpf.sprm.SprmBuffer;
+import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
-import org.apache.poi.util.POILogFactory;
-import org.apache.poi.util.POILogger;
/**
* Represents a PAP FKP. The style properties for paragraph and character runs
*
* @author Ryan Ackley
*/
+@Internal
public final class PAPFormattedDiskPage extends FormattedDiskPage {
- private static final POILogger logger = POILogFactory
- .getLogger( PAPFormattedDiskPage.class );
-
private static final int BX_SIZE = 13;
private static final int FC_SIZE = 4;
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
int offset, int fcMin, TextPieceTable tpt )
{
- this( documentStream, dataStream, offset, tpt, true );
+ this( documentStream, dataStream, offset, tpt );
}
/**
* Creates a PAPFormattedDiskPage from a 512 byte array
*/
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
- int offset, TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
+ int offset, TextPieceTable tpt )
{
super( documentStream, offset );
for ( int x = 0; x < _crun; x++ )
int startAt = getStart( x );
int endAt = getEnd( x );
- if ( ignorePapxWithoutTextPieces
- && !tpt.isIndexInTable( startAt, endAt ) )
- {
- logger.log( POILogger.WARN, "PAPX [",
- Integer.valueOf( startAt ), "; ",
- Integer.valueOf( endAt ),
- ") (bytes) doesn't have corresponding text pieces "
- + "and will be skipped\n\tSkipped SPRM: "
- + new SprmBuffer( getGrpprl( x ), 2 ) );
- _papxList.add( null );
- continue;
- }
-
PAPX papx = new PAPX( startAt, endAt, tpt, getGrpprl( x ),
getParagraphHeight( x ), dataStream );
-
- if ( ignorePapxWithoutTextPieces
- && papx.getStart() == papx.getEnd() )
- {
- logger.log( POILogger.WARN, papx
- + " references zero-length range and will be skipped" );
- _papxList.add( null );
- continue;
- }
-
_papxList.add( papx );
}
_fkp = null;
byte[] tableStream = _hWPFDocFixture._tableStream;
int fcMin = fib.getFcMin();
- _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), null, fakeTPT, false);
+ _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fakeTPT);
HWPFFileSystem fileSys = new HWPFFileSystem();
byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray();
- CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, null, fakeTPT, false);
+ CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, fakeTPT);
ArrayList oldTextRuns = _cHPBinTable._textRuns;
ArrayList newTextRuns = newBinTable._textRuns;
byte[] tableStream = _hWPFDocFixture._tableStream;
PAPBinTable _pAPBinTable = new PAPBinTable( mainStream, tableStream,
- null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), null,
- fakeTPT, false );
+ null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT );
HWPFFileSystem fileSys = new HWPFFileSystem();
byte[] newMainStream = mainOut.toByteArray();
PAPBinTable newBinTable = new PAPBinTable( newMainStream,
- newTableStream, null, 0, newTableStream.length, null, fakeTPT,
- false );
+ newTableStream, null, 0, newTableStream.length, fakeTPT );
List<PAPX> oldTextRuns = _pAPBinTable.getParagraphs();
List<PAPX> newTextRuns = newBinTable.getParagraphs();