123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.hwpf.model;
-
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.IdentityHashMap;
- import java.util.LinkedList;
- import java.util.List;
- import java.util.Map;
-
- import org.apache.poi.hwpf.model.io.HWPFFileSystem;
- import org.apache.poi.hwpf.model.io.HWPFOutputStream;
- import org.apache.poi.hwpf.sprm.SprmBuffer;
- import org.apache.poi.hwpf.sprm.SprmIterator;
- import org.apache.poi.hwpf.sprm.SprmOperation;
- import org.apache.poi.poifs.common.POIFSConstants;
- import org.apache.poi.util.Internal;
- import org.apache.poi.util.LittleEndian;
- import org.apache.poi.util.POILogFactory;
- import org.apache.poi.util.POILogger;
-
- /**
- * This class represents the bin table of Word document but it also serves as a
- * holder for all of the paragraphs of document that have been loaded into
- * memory.
- *
- * @author Ryan Ackley
- */
- @Internal
- public class PAPBinTable
- {
- private static final POILogger logger = POILogFactory
- .getLogger( PAPBinTable.class );
-
- protected ArrayList<PAPX> _paragraphs = new ArrayList<PAPX>();
-
- public PAPBinTable()
- {
- }
-
- /**
- * @deprecated Use
- * {@link #PAPBinTable(byte[], byte[], byte[], int, int, CharIndexTranslator)}
- * instead
- */
- public PAPBinTable( byte[] documentStream, byte[] tableStream,
- byte[] dataStream, int offset, int size, int fcMin,
- TextPieceTable tpt )
- {
- this( documentStream, tableStream, dataStream, offset, size, tpt );
- }
-
- public PAPBinTable( byte[] documentStream, byte[] tableStream,
- byte[] dataStream, int offset, int size,
- CharIndexTranslator charIndexTranslator )
- {
- long start = System.currentTimeMillis();
-
- {
- PlexOfCps binTable = new PlexOfCps( tableStream, offset, size, 4 );
-
- int length = binTable.length();
- for ( int x = 0; x < length; x++ )
- {
- GenericPropertyNode node = binTable.getProperty( x );
-
- int pageNum = LittleEndian.getInt( node.getBytes() );
- int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE
- * pageNum;
-
- PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
- documentStream, dataStream, pageOffset,
- charIndexTranslator );
-
- for ( PAPX papx : pfkp.getPAPXs() )
- {
- if ( papx != null )
- _paragraphs.add( papx );
- }
- }
- }
-
- logger.log( POILogger.DEBUG, "PAPX tables loaded in ",
- Long.valueOf( System.currentTimeMillis() - start ), " ms (",
- Integer.valueOf( _paragraphs.size() ), " elements)" );
-
- if ( _paragraphs.isEmpty() )
- {
- logger.log( POILogger.WARN, "PAPX FKPs are empty" );
- _paragraphs.add( new PAPX( 0, 0, new SprmBuffer( 2 ) ) );
- }
- }
-
- public void rebuild( final StringBuilder docText,
- ComplexFileTable complexFileTable )
- {
- rebuild( docText, complexFileTable, _paragraphs );
- }
-
- static void rebuild( final StringBuilder docText,
- ComplexFileTable complexFileTable, List<PAPX> paragraphs )
- {
- long start = System.currentTimeMillis();
-
- if ( complexFileTable != null )
- {
- SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
-
- // adding PAPX from fast-saved SPRMs
- for ( TextPiece textPiece : complexFileTable.getTextPieceTable()
- .getTextPieces() )
- {
- PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
- if ( !prm.isComplex() )
- continue;
- int igrpprl = prm.getIgrpprl();
-
- if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
- {
- logger.log( POILogger.WARN, textPiece
- + "'s PRM references to unknown grpprl" );
- continue;
- }
-
- boolean hasPap = false;
- SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
- for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
- .hasNext(); )
- {
- SprmOperation sprmOperation = iterator.next();
- if ( sprmOperation.getType() == SprmOperation.TYPE_PAP )
- {
- hasPap = true;
- break;
- }
- }
-
- if ( hasPap )
- {
- SprmBuffer newSprmBuffer = new SprmBuffer( 2 );
- newSprmBuffer.append( sprmBuffer.toByteArray() );
-
- PAPX papx = new PAPX( textPiece.getStart(),
- textPiece.getEnd(), newSprmBuffer );
- paragraphs.add( papx );
- }
- }
-
- logger.log( POILogger.DEBUG,
- "Merged (?) with PAPX from complex file table in ",
- Long.valueOf( System.currentTimeMillis() - start ),
- " ms (", Integer.valueOf( paragraphs.size() ),
- " elements in total)" );
- start = System.currentTimeMillis();
- }
-
- List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( paragraphs );
- Collections.sort( oldPapxSortedByEndPos,
- PropertyNode.EndComparator.instance );
-
- logger.log( POILogger.DEBUG, "PAPX sorted by end position in ",
- Long.valueOf( System.currentTimeMillis() - start ), " ms" );
- start = System.currentTimeMillis();
-
- final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
- {
- int counter = 0;
- for ( PAPX papx : paragraphs )
- {
- papxToFileOrder.put( papx, Integer.valueOf( counter++ ) );
- }
- }
- final Comparator<PAPX> papxFileOrderComparator = new Comparator<PAPX>()
- {
- public int compare( PAPX o1, PAPX o2 )
- {
- Integer i1 = papxToFileOrder.get( o1 );
- Integer i2 = papxToFileOrder.get( o2 );
- return i1.compareTo( i2 );
- }
- };
-
- logger.log( POILogger.DEBUG, "PAPX's order map created in ",
- Long.valueOf( System.currentTimeMillis() - start ), " ms" );
- start = System.currentTimeMillis();
-
- List<PAPX> newPapxs = new LinkedList<PAPX>();
- int lastParStart = 0;
- int lastPapxIndex = 0;
- for ( int charIndex = 0; charIndex < docText.length(); charIndex++ )
- {
- final char c = docText.charAt( charIndex );
- if ( c != 13 && c != 7 && c != 12 )
- continue;
-
- final int startInclusive = lastParStart;
- final int endExclusive = charIndex + 1;
-
- boolean broken = false;
- List<PAPX> papxs = new LinkedList<PAPX>();
- for ( int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos
- .size(); papxIndex++ )
- {
- broken = false;
- PAPX papx = oldPapxSortedByEndPos.get( papxIndex );
-
- assert startInclusive == 0
- || papxIndex + 1 == oldPapxSortedByEndPos.size()
- || papx.getEnd() > startInclusive;
-
- if ( papx.getEnd() - 1 > charIndex )
- {
- lastPapxIndex = papxIndex;
- broken = true;
- break;
- }
-
- papxs.add( papx );
- }
- if ( !broken )
- {
- lastPapxIndex = oldPapxSortedByEndPos.size() - 1;
- }
-
- if ( papxs.size() == 0 )
- {
- logger.log( POILogger.WARN, "Paragraph [",
- Integer.valueOf( startInclusive ), "; ",
- Integer.valueOf( endExclusive ),
- ") has no PAPX. Creating new one." );
- // create it manually
- PAPX papx = new PAPX( startInclusive, endExclusive,
- new SprmBuffer( 2 ) );
- newPapxs.add( papx );
-
- lastParStart = endExclusive;
- continue;
- }
-
- if ( papxs.size() == 1 )
- {
- // can we reuse existing?
- PAPX existing = papxs.get( 0 );
- if ( existing.getStart() == startInclusive
- && existing.getEnd() == endExclusive )
- {
- newPapxs.add( existing );
- lastParStart = endExclusive;
- continue;
- }
- }
-
- // restore file order of PAPX
- Collections.sort( papxs, papxFileOrderComparator );
-
- SprmBuffer sprmBuffer = null;
- for ( PAPX papx : papxs )
- {
- if ( papx.getGrpprl() == null || papx.getGrpprl().length == 0 )
- continue;
-
- if ( sprmBuffer == null ) {
- sprmBuffer = papx.getSprmBuf().clone();
- } else {
- sprmBuffer.append( papx.getGrpprl(), 2 );
- }
- }
- PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer );
- newPapxs.add( newPapx );
-
- lastParStart = endExclusive;
- continue;
- }
- paragraphs.clear();
- paragraphs.addAll( newPapxs );
-
- logger.log( POILogger.DEBUG, "PAPX rebuilded from document text in ",
- Long.valueOf( System.currentTimeMillis() - start ), " ms (",
- Integer.valueOf( paragraphs.size() ), " elements)" );
- start = System.currentTimeMillis();
- }
-
- public void insert(int listIndex, int cpStart, SprmBuffer buf)
- {
-
- PAPX forInsert = new PAPX(0, 0, buf);
-
- // Ensure character offsets are really characters
- forInsert.setStart(cpStart);
- forInsert.setEnd(cpStart);
-
- if (listIndex == _paragraphs.size())
- {
- _paragraphs.add(forInsert);
- }
- else
- {
- PAPX currentPap = _paragraphs.get(listIndex);
- if (currentPap != null && currentPap.getStart() < cpStart)
- {
- SprmBuffer clonedBuf = currentPap.getSprmBuf().clone();
-
- // Copy the properties of the one before to afterwards
- // Will go:
- // Original, until insert at point
- // New one
- // Clone of original, on to the old end
- PAPX clone = new PAPX(0, 0, clonedBuf);
- // Again ensure contains character based offsets no matter what
- clone.setStart(cpStart);
- clone.setEnd(currentPap.getEnd());
-
- currentPap.setEnd(cpStart);
-
- _paragraphs.add(listIndex + 1, forInsert);
- _paragraphs.add(listIndex + 2, clone);
- }
- else
- {
- _paragraphs.add(listIndex, forInsert);
- }
- }
-
- }
-
- public void adjustForDelete(int listIndex, int offset, int length)
- {
- int size = _paragraphs.size();
- int endMark = offset + length;
- int endIndex = listIndex;
-
- PAPX papx = _paragraphs.get(endIndex);
- while (papx.getEnd() < endMark)
- {
- papx = _paragraphs.get(++endIndex);
- }
- if (listIndex == endIndex)
- {
- papx = _paragraphs.get(endIndex);
- papx.setEnd((papx.getEnd() - endMark) + offset);
- }
- else
- {
- papx = _paragraphs.get(listIndex);
- papx.setEnd(offset);
- for (int x = listIndex + 1; x < endIndex; x++)
- {
- papx = _paragraphs.get(x);
- papx.setStart(offset);
- papx.setEnd(offset);
- }
- papx = _paragraphs.get(endIndex);
- papx.setEnd((papx.getEnd() - endMark) + offset);
- }
-
- for (int x = endIndex + 1; x < size; x++)
- {
- papx = _paragraphs.get(x);
- papx.setStart(papx.getStart() - length);
- papx.setEnd(papx.getEnd() - length);
- }
- }
-
-
- public void adjustForInsert(int listIndex, int length)
- {
- int size = _paragraphs.size();
- PAPX papx = _paragraphs.get(listIndex);
- papx.setEnd(papx.getEnd() + length);
-
- for (int x = listIndex + 1; x < size; x++)
- {
- papx = _paragraphs.get(x);
- papx.setStart(papx.getStart() + length);
- papx.setEnd(papx.getEnd() + length);
- }
- }
-
-
- public ArrayList<PAPX> getParagraphs()
- {
- return _paragraphs;
- }
-
- @Deprecated
- public void writeTo( HWPFFileSystem sys, CharIndexTranslator translator )
- throws IOException
- {
- HWPFOutputStream wordDocumentStream = sys.getStream( "WordDocument" );
- HWPFOutputStream tableStream = sys.getStream( "1Table" );
-
- writeTo( wordDocumentStream, tableStream, translator );
- }
-
- public void writeTo( HWPFOutputStream wordDocumentStream,
- HWPFOutputStream tableStream, CharIndexTranslator translator )
- throws IOException
- {
-
- PlexOfCps binTable = new PlexOfCps(4);
-
- // each FKP must start on a 512 byte page.
- int docOffset = wordDocumentStream.getOffset();
- int mod = docOffset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE;
- if (mod != 0)
- {
- byte[] padding = new byte[POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod];
- wordDocumentStream.write(padding);
- }
-
- // get the page number for the first fkp
- docOffset = wordDocumentStream.getOffset();
- int pageNum = docOffset/POIFSConstants.SMALLER_BIG_BLOCK_SIZE;
-
- // get the ending fc
- // int endingFc = _paragraphs.get(_paragraphs.size() - 1).getEnd();
- // endingFc += fcMin;
- int endingFc = translator.getByteIndex( _paragraphs.get(
- _paragraphs.size() - 1 ).getEnd() );
-
- ArrayList<PAPX> overflow = _paragraphs;
- do
- {
- PAPX startingProp = overflow.get(0);
-
- // int start = startingProp.getStart() + fcMin;
- int start = translator.getByteIndex( startingProp.getStart() );
-
- PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage();
- pfkp.fill(overflow);
-
- byte[] bufFkp = pfkp.toByteArray(tableStream, translator);
- wordDocumentStream.write(bufFkp);
- overflow = pfkp.getOverflow();
-
- int end = endingFc;
- if (overflow != null)
- {
- // end = overflow.get(0).getStart() + fcMin;
- end = translator.getByteIndex( overflow.get( 0 ).getStart() );
- }
-
- byte[] intHolder = new byte[4];
- LittleEndian.putInt(intHolder, 0, pageNum++);
- binTable.addProperty(new GenericPropertyNode(start, end, intHolder));
-
- }
- while (overflow != null);
- tableStream.write(binTable.toByteArray());
- }
- }
|