// Now load the rest of the properties, which need to be adjusted
// for where text really begin
- _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
- _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true);
+ _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _cft, _tpt, true);
+ _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _cft, _tpt, true);
// Read FSPA and Escher information
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer;
+import org.apache.poi.hwpf.sprm.SprmIterator;
+import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
* Constructor used to read a binTable in from a Word document.
*
* @deprecated Use
- * {@link #CHPBinTable(byte[],byte[],int,int,TextPieceTable,boolean)}
+ * {@link #CHPBinTable(byte[],byte[],int,int,ComplexFileTable,TextPieceTable, boolean)}
* instead
*/
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
int size, int fcMin, TextPieceTable tpt )
{
- this( documentStream, tableStream, offset, size, tpt, true );
+ this( documentStream, tableStream, offset, size, null, tpt, true );
}
/**
* Constructor used to read a binTable in from a Word document.
*/
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
- int size, TextPieceTable tpt, boolean ignoreChpxWithoutTextPieces )
+ int size, ComplexFileTable complexFileTable, TextPieceTable tpt,
+ boolean ignoreChpxWithoutTextPieces )
{
/*
* Page 35:
}
}
+ if ( complexFileTable != null )
+ {
+ SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
+
+ // adding CHPX from fast-saved SPRMs
+ for ( TextPiece textPiece : tpt.getTextPieces() )
+ {
+ PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
+ if ( !prm.isComplex() )
+ continue;
+ int igrpprl = prm.getIgrpprl();
+
+ if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
+ {
+ logger.log( POILogger.WARN, textPiece
+ + "'s PRM references to unknown grpprl" );
+ continue;
+ }
+
+ boolean hasChp = false;
+ SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
+ for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
+ .hasNext(); )
+ {
+ SprmOperation sprmOperation = iterator.next();
+ if ( sprmOperation.getType() == SprmOperation.TYPE_CHP )
+ {
+ hasChp = true;
+ break;
+ }
+ }
+
+ if ( hasChp )
+ {
+ SprmBuffer newSprmBuffer;
+ try
+ {
+ newSprmBuffer = (SprmBuffer) sprmBuffer.clone();
+ }
+ catch ( CloneNotSupportedException e )
+ {
+ // shall not happen
+ throw new Error( e );
+ }
+
+ CHPX chpx = new CHPX( textPiece.getStart(),
+ textPiece.getEnd(), newSprmBuffer );
+ _textRuns.add( chpx );
+ }
+ }
+ }
+
// rebuild document paragraphs structure
StringBuilder docText = new StringBuilder();
for ( TextPiece textPiece : tpt.getTextPieces() )
package org.apache.poi.hwpf.model;
import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+import org.apache.poi.hwpf.model.io.HWPFFileSystem;
+import org.apache.poi.hwpf.model.io.HWPFOutputStream;
+import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.util.LittleEndian;
-import org.apache.poi.hwpf.model.io.*;
public final class ComplexFileTable
{
protected TextPieceTable _tpt;
+ private SprmBuffer[] _grpprls;
+
public ComplexFileTable()
{
_tpt = new TextPieceTable();
{
//skips through the prms before we reach the piece table. These contain data
//for actual fast saved files
- while (tableStream[offset] == GRPPRL_TYPE)
- {
- offset++;
- int skip = LittleEndian.getShort(tableStream, offset);
- offset += LittleEndian.SHORT_SIZE + skip;
- }
+ List<SprmBuffer> sprmBuffers = new LinkedList<SprmBuffer>();
+ while ( tableStream[offset] == GRPPRL_TYPE )
+ {
+ offset++;
+ int size = LittleEndian.getShort( tableStream, offset );
+ offset += LittleEndian.SHORT_SIZE;
+ byte[] bs = LittleEndian.getByteArray( tableStream, offset, size );
+ offset += size;
+
+ SprmBuffer sprmBuffer = new SprmBuffer( bs, false, 0 );
+ sprmBuffers.add( sprmBuffer );
+ }
+ this._grpprls = sprmBuffers.toArray( new SprmBuffer[sprmBuffers.size()] );
+
if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
{
throw new IOException("The text piece table is corrupted");
return _tpt;
}
+ public SprmBuffer[] getGrpprls()
+ {
+ return _grpprls;
+ }
+
public void writeTo(HWPFFileSystem sys)
throws IOException
{
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer;
+import org.apache.poi.hwpf.sprm.SprmIterator;
+import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
byte[] dataStream, int offset, int size, int fcMin,
TextPieceTable tpt )
{
- this( documentStream, tableStream, dataStream, offset, size, tpt, true );
+ this( documentStream, tableStream, dataStream, offset, size, null, tpt, true );
}
public PAPBinTable( byte[] documentStream, byte[] tableStream,
- byte[] dataStream, int offset, int size, TextPieceTable tpt,
- boolean ignorePapxWithoutTextPieces )
+ byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable,
+ TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
{
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
this.tpt = tpt;
}
}
+ if ( complexFileTable != null )
+ {
+ SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
+
+ // adding CHPX from fast-saved SPRMs
+ for ( TextPiece textPiece : tpt.getTextPieces() )
+ {
+ PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
+ if ( !prm.isComplex() )
+ continue;
+ int igrpprl = prm.getIgrpprl();
+
+ if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
+ {
+ logger.log( POILogger.WARN, textPiece
+ + "'s PRM references to unknown grpprl" );
+ continue;
+ }
+
+ boolean hasPap = false;
+ SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
+ for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
+ .hasNext(); )
+ {
+ SprmOperation sprmOperation = iterator.next();
+ if ( sprmOperation.getType() == SprmOperation.TYPE_PAP )
+ {
+ hasPap = true;
+ break;
+ }
+ }
+
+ if ( hasPap )
+ {
+ SprmBuffer newSprmBuffer = new SprmBuffer(2);
+ newSprmBuffer.append( sprmBuffer.toByteArray() );
+
+ PAPX papx = new PAPX( textPiece.getStart(),
+ textPiece.getEnd(), newSprmBuffer, dataStream );
+ _paragraphs.add( papx );
+ }
+ }
+ }
+
// rebuild document paragraphs structure
StringBuilder docText = new StringBuilder();
for ( TextPiece textPiece : tpt.getTextPieces() )
if ( fChar == 13 || fChar == 7 || fChar == 12 )
break;
}
- // if ( papx.getStart() <= charIndex && charIndex <
- // papx.getEnd() )
- // {
- // papxs.add( papx );
- // }
}
if ( papxs.size() == 0 )
}
}
- SprmBuffer sprmBuffer = new SprmBuffer( 2 );
+ SprmBuffer sprmBuffer = null;
for ( PAPX papx : papxs )
{
- sprmBuffer.append( papx.getGrpprl(), 2 );
+ if ( sprmBuffer == null )
+ try
+ {
+ sprmBuffer = (SprmBuffer) papx.getSprmBuf().clone();
+ }
+ catch ( CloneNotSupportedException e )
+ {
+ // can't happen
+ throw new Error( e );
+ }
+ else
+ sprmBuffer.append( papx.getGrpprl(), 2 );
}
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer,
dataStream );
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
+import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
Integer.valueOf( startAt ), "; ",
Integer.valueOf( endAt ),
") (bytes) doesn't have corresponding text pieces "
- + "and will be skipped" );
+ + "and will be skipped\n\tSkipped SPRM: "
+ + new SprmBuffer( getGrpprl( x ), 2 ) );
_papxList.add( null );
continue;
}
return _papxList.get(index);
}
+ public List<PAPX> getPAPXs()
+ {
+ return Collections.unmodifiableList( _papxList );
+ }
+
/**
* Gets the papx grpprl for the paragraph at index in this fkp.
*
byte[] tableStream = _hWPFDocFixture._tableStream;
int fcMin = fib.getFcMin();
- _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fakeTPT, false);
+ _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), null, fakeTPT, false);
HWPFFileSystem fileSys = new HWPFFileSystem();
byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray();
- CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, fakeTPT, false);
+ CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, null, fakeTPT, false);
ArrayList oldTextRuns = _cHPBinTable._textRuns;
ArrayList newTextRuns = newBinTable._textRuns;
byte[] mainStream = _hWPFDocFixture._mainStream;
byte[] tableStream = _hWPFDocFixture._tableStream;
- _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false);
+ _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), null, fakeTPT, false);
HWPFFileSystem fileSys = new HWPFFileSystem();
byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray();
- PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false);
+ PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, null, fakeTPT, false);
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
ArrayList newTextRuns = newBinTable.getParagraphs();