From: Sergey Vladimirov Date: Tue, 12 Jul 2011 00:40:27 +0000 (+0000) Subject: add initial support for fast-saved files X-Git-Tag: REL_3_8_BETA4~197 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=fd8a518446b0d3908a6712bee287645927ed7097;p=poi.git add initial support for fast-saved files git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145410 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index b8a9892f9d..e317b24fc8 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -216,8 +216,8 @@ public final class HWPFDocument extends HWPFDocumentCore // Now load the rest of the properties, which need to be adjusted // for where text really begin - _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true); - _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true); + _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _cft, _tpt, true); + _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _cft, _tpt, true); // Read FSPA and Escher information _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces()); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java index 75abad1f0e..ae1b9736ba 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java @@ -29,6 +29,8 @@ import java.util.Set; import org.apache.poi.hwpf.model.io.HWPFFileSystem; import org.apache.poi.hwpf.model.io.HWPFOutputStream; import org.apache.poi.hwpf.sprm.SprmBuffer; +import org.apache.poi.hwpf.sprm.SprmIterator; +import org.apache.poi.hwpf.sprm.SprmOperation; import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.POILogFactory; @@ -58,20 +60,21 @@ public class CHPBinTable * Constructor used to read a binTable in from a Word document. * * @deprecated Use - * {@link #CHPBinTable(byte[],byte[],int,int,TextPieceTable,boolean)} + * {@link #CHPBinTable(byte[],byte[],int,int,ComplexFileTable,TextPieceTable, boolean)} * instead */ public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset, int size, int fcMin, TextPieceTable tpt ) { - this( documentStream, tableStream, offset, size, tpt, true ); + this( documentStream, tableStream, offset, size, null, tpt, true ); } /** * Constructor used to read a binTable in from a Word document. */ public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset, - int size, TextPieceTable tpt, boolean ignoreChpxWithoutTextPieces ) + int size, ComplexFileTable complexFileTable, TextPieceTable tpt, + boolean ignoreChpxWithoutTextPieces ) { /* * Page 35: @@ -105,6 +108,58 @@ public class CHPBinTable } } + if ( complexFileTable != null ) + { + SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls(); + + // adding CHPX from fast-saved SPRMs + for ( TextPiece textPiece : tpt.getTextPieces() ) + { + PropertyModifier prm = textPiece.getPieceDescriptor().getPrm(); + if ( !prm.isComplex() ) + continue; + int igrpprl = prm.getIgrpprl(); + + if ( igrpprl < 0 || igrpprl >= sprmBuffers.length ) + { + logger.log( POILogger.WARN, textPiece + + "'s PRM references to unknown grpprl" ); + continue; + } + + boolean hasChp = false; + SprmBuffer sprmBuffer = sprmBuffers[igrpprl]; + for ( SprmIterator iterator = sprmBuffer.iterator(); iterator + .hasNext(); ) + { + SprmOperation sprmOperation = iterator.next(); + if ( sprmOperation.getType() == SprmOperation.TYPE_CHP ) + { + hasChp = true; + break; + } + } + + if ( hasChp ) + { + SprmBuffer newSprmBuffer; + try + { + newSprmBuffer = (SprmBuffer) sprmBuffer.clone(); + } + catch ( CloneNotSupportedException e ) + { + // shall not happen + throw new Error( e ); + } + + CHPX chpx = new CHPX( textPiece.getStart(), + textPiece.getEnd(), newSprmBuffer ); + _textRuns.add( chpx ); + } + } + } + // rebuild document paragraphs structure StringBuilder docText = new StringBuilder(); for ( TextPiece textPiece : tpt.getTextPieces() ) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java index ffc62d0fa7..d679c03e95 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java @@ -18,9 +18,13 @@ package org.apache.poi.hwpf.model; import java.io.IOException; +import java.util.LinkedList; +import java.util.List; +import org.apache.poi.hwpf.model.io.HWPFFileSystem; +import org.apache.poi.hwpf.model.io.HWPFOutputStream; +import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.util.LittleEndian; -import org.apache.poi.hwpf.model.io.*; public final class ComplexFileTable { @@ -30,6 +34,8 @@ public final class ComplexFileTable protected TextPieceTable _tpt; + private SprmBuffer[] _grpprls; + public ComplexFileTable() { _tpt = new TextPieceTable(); @@ -39,12 +45,20 @@ public final class ComplexFileTable { //skips through the prms before we reach the piece table. These contain data //for actual fast saved files - while (tableStream[offset] == GRPPRL_TYPE) - { - offset++; - int skip = LittleEndian.getShort(tableStream, offset); - offset += LittleEndian.SHORT_SIZE + skip; - } + List sprmBuffers = new LinkedList(); + while ( tableStream[offset] == GRPPRL_TYPE ) + { + offset++; + int size = LittleEndian.getShort( tableStream, offset ); + offset += LittleEndian.SHORT_SIZE; + byte[] bs = LittleEndian.getByteArray( tableStream, offset, size ); + offset += size; + + SprmBuffer sprmBuffer = new SprmBuffer( bs, false, 0 ); + sprmBuffers.add( sprmBuffer ); + } + this._grpprls = sprmBuffers.toArray( new SprmBuffer[sprmBuffers.size()] ); + if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE) { throw new IOException("The text piece table is corrupted"); @@ -59,6 +73,11 @@ public final class ComplexFileTable return _tpt; } + public SprmBuffer[] getGrpprls() + { + return _grpprls; + } + public void writeTo(HWPFFileSystem sys) throws IOException { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java index 645ea14f6b..7627b2e0b4 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java @@ -26,6 +26,8 @@ import java.util.List; import org.apache.poi.hwpf.model.io.HWPFFileSystem; import org.apache.poi.hwpf.model.io.HWPFOutputStream; import org.apache.poi.hwpf.sprm.SprmBuffer; +import org.apache.poi.hwpf.sprm.SprmIterator; +import org.apache.poi.hwpf.sprm.SprmOperation; import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.POILogFactory; @@ -62,12 +64,12 @@ public class PAPBinTable byte[] dataStream, int offset, int size, int fcMin, TextPieceTable tpt ) { - this( documentStream, tableStream, dataStream, offset, size, tpt, true ); + this( documentStream, tableStream, dataStream, offset, size, null, tpt, true ); } public PAPBinTable( byte[] documentStream, byte[] tableStream, - byte[] dataStream, int offset, int size, TextPieceTable tpt, - boolean ignorePapxWithoutTextPieces ) + byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable, + TextPieceTable tpt, boolean ignorePapxWithoutTextPieces ) { PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); this.tpt = tpt; @@ -94,6 +96,50 @@ public class PAPBinTable } } + if ( complexFileTable != null ) + { + SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls(); + + // adding CHPX from fast-saved SPRMs + for ( TextPiece textPiece : tpt.getTextPieces() ) + { + PropertyModifier prm = textPiece.getPieceDescriptor().getPrm(); + if ( !prm.isComplex() ) + continue; + int igrpprl = prm.getIgrpprl(); + + if ( igrpprl < 0 || igrpprl >= sprmBuffers.length ) + { + logger.log( POILogger.WARN, textPiece + + "'s PRM references to unknown grpprl" ); + continue; + } + + boolean hasPap = false; + SprmBuffer sprmBuffer = sprmBuffers[igrpprl]; + for ( SprmIterator iterator = sprmBuffer.iterator(); iterator + .hasNext(); ) + { + SprmOperation sprmOperation = iterator.next(); + if ( sprmOperation.getType() == SprmOperation.TYPE_PAP ) + { + hasPap = true; + break; + } + } + + if ( hasPap ) + { + SprmBuffer newSprmBuffer = new SprmBuffer(2); + newSprmBuffer.append( sprmBuffer.toByteArray() ); + + PAPX papx = new PAPX( textPiece.getStart(), + textPiece.getEnd(), newSprmBuffer, dataStream ); + _paragraphs.add( papx ); + } + } + } + // rebuild document paragraphs structure StringBuilder docText = new StringBuilder(); for ( TextPiece textPiece : tpt.getTextPieces() ) @@ -144,11 +190,6 @@ public class PAPBinTable if ( fChar == 13 || fChar == 7 || fChar == 12 ) break; } - // if ( papx.getStart() <= charIndex && charIndex < - // papx.getEnd() ) - // { - // papxs.add( papx ); - // } } if ( papxs.size() == 0 ) @@ -178,10 +219,21 @@ public class PAPBinTable } } - SprmBuffer sprmBuffer = new SprmBuffer( 2 ); + SprmBuffer sprmBuffer = null; for ( PAPX papx : papxs ) { - sprmBuffer.append( papx.getGrpprl(), 2 ); + if ( sprmBuffer == null ) + try + { + sprmBuffer = (SprmBuffer) papx.getSprmBuf().clone(); + } + catch ( CloneNotSupportedException e ) + { + // can't happen + throw new Error( e ); + } + else + sprmBuffer.append( papx.getGrpprl(), 2 ); } PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer, dataStream ); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java index 4e357b38f4..05fff84f5c 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java @@ -19,8 +19,10 @@ package org.apache.poi.hwpf.model; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; @@ -90,7 +92,8 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage { Integer.valueOf( startAt ), "; ", Integer.valueOf( endAt ), ") (bytes) doesn't have corresponding text pieces " - + "and will be skipped" ); + + "and will be skipped\n\tSkipped SPRM: " + + new SprmBuffer( getGrpprl( x ), 2 ) ); _papxList.add( null ); continue; } @@ -150,6 +153,11 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage { return _papxList.get(index); } + public List getPAPXs() + { + return Collections.unmodifiableList( _papxList ); + } + /** * Gets the papx grpprl for the paragraph at index in this fkp. * diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java index e2aeeb46cb..38700f7907 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java @@ -46,7 +46,7 @@ public final class TestCHPBinTable byte[] tableStream = _hWPFDocFixture._tableStream; int fcMin = fib.getFcMin(); - _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fakeTPT, false); + _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), null, fakeTPT, false); HWPFFileSystem fileSys = new HWPFFileSystem(); @@ -57,7 +57,7 @@ public final class TestCHPBinTable byte[] newTableStream = tableOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray(); - CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, fakeTPT, false); + CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, null, fakeTPT, false); ArrayList oldTextRuns = _cHPBinTable._textRuns; ArrayList newTextRuns = newBinTable._textRuns; diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java index 8bd102a16c..e34d563acb 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java @@ -40,7 +40,7 @@ public final class TestPAPBinTable byte[] mainStream = _hWPFDocFixture._mainStream; byte[] tableStream = _hWPFDocFixture._tableStream; - _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false); + _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), null, fakeTPT, false); HWPFFileSystem fileSys = new HWPFFileSystem(); @@ -51,7 +51,7 @@ public final class TestPAPBinTable byte[] newTableStream = tableOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray(); - PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false); + PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, null, fakeTPT, false); ArrayList oldTextRuns = _pAPBinTable.getParagraphs(); ArrayList newTextRuns = newBinTable.getParagraphs();