123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- /* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- ==================================================================== */
- package org.apache.poi.hwpf.model;
-
- import java.nio.charset.Charset;
- import java.util.ArrayList;
- import java.util.Collections;
-
- import org.apache.poi.util.CodePageUtil;
- import org.apache.poi.util.Internal;
- import org.apache.poi.util.POILogFactory;
- import org.apache.poi.util.POILogger;
-
-
- @Internal
- public class OldTextPieceTable extends TextPieceTable {
-
- private static final POILogger logger = POILogFactory
- .getLogger(OldTextPieceTable.class);
-
- public OldTextPieceTable() {
- super();
- }
-
- public OldTextPieceTable(byte[] documentStream, byte[] tableStream,
- int offset, int size, int fcMin, Charset charset) {
- //super(documentStream, tableStream, offset, size, fcMin, charset);
- // get our plex of PieceDescriptors
- PlexOfCps pieceTable = new PlexOfCps(tableStream, offset, size,
- PieceDescriptor.getSizeInBytes());
-
- int length = pieceTable.length();
- PieceDescriptor[] pieces = new PieceDescriptor[length];
-
- // iterate through piece descriptors raw bytes and create
- // PieceDescriptor objects
- for (int x = 0; x < length; x++) {
- GenericPropertyNode node = pieceTable.getProperty(x);
- pieces[x] = new PieceDescriptor(node.getBytes(), 0, charset);
- }
-
- // Figure out the cp of the earliest text piece
- // Note that text pieces don't have to be stored in order!
- _cpMin = pieces[0].getFilePosition() - fcMin;
- for (PieceDescriptor piece : pieces) {
- int start = piece.getFilePosition() - fcMin;
- if (start < _cpMin) {
- _cpMin = start;
- }
- }
-
- // using the PieceDescriptors, build our list of TextPieces.
- for (int x = 0; x < pieces.length; x++) {
- int start = pieces[x].getFilePosition();
- GenericPropertyNode node = pieceTable.getProperty(x);
-
- // Grab the start and end, which are in characters
- int nodeStartChars = node.getStart();
- int nodeEndChars = node.getEnd();
-
- // What's the relationship between bytes and characters?
- boolean unicode = pieces[x].isUnicode();
- int multiple = 1;
- if (unicode ||
- (charset != null && CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(charset))) {
- multiple = 2;
- }
-
- // Figure out the length, in bytes and chars
- int textSizeChars = (nodeEndChars - nodeStartChars);
- int textSizeBytes = textSizeChars * multiple;
-
- // Grab the data that makes up the piece
- byte[] buf = new byte[textSizeBytes];
- System.arraycopy(documentStream, start, buf, 0, textSizeBytes);
-
- // And now build the piece
- final TextPiece newTextPiece = newTextPiece(nodeStartChars, nodeEndChars, buf,
- pieces[x]);
-
- _textPieces.add(newTextPiece);
- }
-
- // In the interest of our sanity, now sort the text pieces
- // into order, if they're not already
- Collections.sort(_textPieces);
- _textPiecesFCOrder = new ArrayList<TextPiece>(_textPieces);
- Collections.sort(_textPiecesFCOrder, new FCComparator());
-
- }
-
- @Override
- protected TextPiece newTextPiece(int nodeStartChars, int nodeEndChars, byte[] buf, PieceDescriptor pd) {
- return new OldTextPiece(nodeStartChars, nodeEndChars, buf, pd);
- }
-
- @Override
- protected int getEncodingMultiplier(TextPiece textPiece) {
- Charset charset = textPiece.getPieceDescriptor().getCharset();
- if (charset != null && CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(charset)) {
- return 2;
- }
- return 1;
- }
- }
|