123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
-
- package org.apache.poi.hwpf.model;
-
- import java.io.ByteArrayOutputStream;
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.List;
-
- import org.apache.poi.hwpf.model.io.HWPFFileSystem;
- import org.apache.poi.util.IOUtils;
- import org.apache.poi.util.Internal;
- import org.apache.poi.util.LittleEndian;
- import org.apache.poi.util.LittleEndianConsts;
- import org.apache.poi.util.POILogFactory;
- import org.apache.poi.util.POILogger;
-
- @Internal
- public class SectionTable
- {
-
- //arbitrarily selected; may need to increase
- private static final int MAX_RECORD_LENGTH = 100_000;
-
- private final static POILogger _logger = POILogFactory.getLogger(SectionTable.class);
- private static final int SED_SIZE = 12;
-
- protected List<SEPX> _sections = new ArrayList<>();
- protected List<TextPiece> _text;
-
- /** So we can know if things are unicode or not */
- //private TextPieceTable tpt;
-
- public SectionTable()
- {
- }
-
-
- public SectionTable(
- byte[] documentStream, byte[] tableStream,
- int offset, int size, int fcMin, TextPieceTable tpt, int mainLength)
- {
- PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE);
- //this.tpt = tpt;
- this._text = tpt.getTextPieces();
-
- int length = sedPlex.length();
-
- for (int x = 0; x < length; x++)
- {
- GenericPropertyNode node = sedPlex.getProperty(x);
- SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0);
-
- int fileOffset = sed.getFc();
- // int startAt = CPtoFC(node.getStart());
- // int endAt = CPtoFC(node.getEnd());
- int startAt = node.getStart();
- int endAt = node.getEnd();
-
- // check for the optimization
- if (fileOffset == 0xffffffff)
- {
- _sections.add(new SEPX(sed, startAt, endAt, new byte[0]));
- }
- else
- {
- // The first short at the offset is the size of the grpprl.
- int sepxSize = LittleEndian.getShort(documentStream, fileOffset);
- byte[] buf = IOUtils.safelyAllocate(sepxSize, MAX_RECORD_LENGTH);
- fileOffset += LittleEndianConsts.SHORT_SIZE;
- System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
- _sections.add(new SEPX(sed, startAt, endAt, buf));
- }
- }
-
- // Some files seem to lie about their unicode status, which
- // is very very pesky. Try to work around these, but this
- // is getting on for black magic...
- boolean matchAt = false;
- boolean matchHalf = false;
- for (SEPX s : _sections) {
- if (s.getEnd() == mainLength) {
- matchAt = true;
- } else if (s.getEnd() == mainLength || s.getEnd() == mainLength - 1) {
- matchHalf = true;
- }
- }
- if(! matchAt && matchHalf) {
- _logger.log(POILogger.WARN, "Your document seemed to be mostly unicode, but the section definition was in bytes! Trying anyway, but things may well go wrong!");
- for(int i=0; i<_sections.size(); i++) {
- SEPX s = _sections.get(i);
- GenericPropertyNode node = sedPlex.getProperty(i);
-
- // s.setStart( CPtoFC(node.getStart()) );
- // s.setEnd( CPtoFC(node.getEnd()) );
- int startAt = node.getStart();
- int endAt = node.getEnd();
- s.setStart( startAt );
- s.setEnd( endAt );
- }
- }
-
- _sections.sort(PropertyNode.StartComparator);
- }
-
- public void adjustForInsert(int listIndex, int length)
- {
- int size = _sections.size();
- SEPX sepx = _sections.get(listIndex);
- sepx.setEnd(sepx.getEnd() + length);
-
- for (int x = listIndex + 1; x < size; x++)
- {
- sepx = _sections.get(x);
- sepx.setStart(sepx.getStart() + length);
- sepx.setEnd(sepx.getEnd() + length);
- }
- }
-
- // goss version of CPtoFC - this takes into account non-contiguous textpieces
- // that we have come across in real world documents. Tests against the example
- // code in HWPFDocument show no variation to Ryan's version of the code in
- // normal use, but this version works with our non-contiguous test case.
- // So far unable to get this test case to be written out as well due to
- // other issues. - piers
- //
- // i'm commenting this out, because it just doesn't work with non-contiguous
- // textpieces :( Usual (as for PAPX and CHPX) call to TextPiecesTable does.
- // private int CPtoFC(int CP)
- // {
- // TextPiece TP = null;
- //
- // for(int i=_text.size()-1; i>-1; i--)
- // {
- // TP = _text.get(i);
- //
- // if(CP >= TP.getCP()) break;
- // }
- // int FC = TP.getPieceDescriptor().getFilePosition();
- // int offset = CP - TP.getCP();
- // if (TP.isUnicode()) {
- // offset = offset*2;
- // }
- // FC = FC+offset;
- // return FC;
- // }
-
- public List<SEPX> getSections()
- {
- return _sections;
- }
-
- @Deprecated
- public void writeTo( HWPFFileSystem sys, int fcMin ) throws IOException
- {
- ByteArrayOutputStream docStream = sys.getStream( "WordDocument" );
- ByteArrayOutputStream tableStream = sys.getStream( "1Table" );
-
- writeTo( docStream, tableStream );
- }
-
- public void writeTo(
- ByteArrayOutputStream wordDocumentStream,
- ByteArrayOutputStream tableStream ) throws IOException
- {
-
- int offset = wordDocumentStream.size();
- //int len = _sections.size();
- PlexOfCps plex = new PlexOfCps(SED_SIZE);
-
- for (SEPX sepx : _sections) {
- byte[] grpprl = sepx.getGrpprl();
-
- // write the sepx to the document stream. starts with a 2 byte size
- // followed by the grpprl
- byte[] shortBuf = new byte[2];
- LittleEndian.putShort(shortBuf, 0, (short) grpprl.length);
-
- wordDocumentStream.write(shortBuf);
- wordDocumentStream.write(grpprl);
-
- // set the fc in the section descriptor
- SectionDescriptor sed = sepx.getSectionDescriptor();
- sed.setFc(offset);
-
- // add the section descriptor bytes to the PlexOfCps.
-
- /* original line */
- GenericPropertyNode property = new GenericPropertyNode(
- sepx.getStart(), sepx.getEnd(), sed.toByteArray());
- /*
- * Line using Ryan's FCtoCP() conversion method - unable to observe
- * any effect on our testcases when using this code - piers
- */
- /*
- * there is an effect on Bug45743.doc actually. writeoutreadback
- * changes byte offset of chars (but preserve string offsets).
- * Changing back to original lines - sergey
- */
- // GenericPropertyNode property = new GenericPropertyNode(
- // tpt.getCharIndex( sepx.getStartBytes() ),
- // tpt.getCharIndex( sepx.getEndBytes() ), sed.toByteArray() );
-
- plex.addProperty(property);
-
- offset = wordDocumentStream.size();
- }
- tableStream.write(plex.toByteArray());
- }
- }
|