See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
-
package org.apache.poi.hwpf.model;
import java.io.IOException;
* The piece table for matching up character positions to bits of text. This
* mostly works in bytes, but the TextPieces themselves work in characters. This
* does the icky convertion.
- *
+ *
* @author Ryan Ackley
*/
-public class TextPieceTable implements CharIndexTranslator {
- protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
+public class TextPieceTable implements CharIndexTranslator
+{
+ // int _multiple;
+ int _cpMin;
+ protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
protected ArrayList<TextPiece> _textPiecesFCOrder = new ArrayList<TextPiece>();
- // int _multiple;
- int _cpMin;
-
- public TextPieceTable() {
- }
-
- public TextPieceTable(byte[] documentStream, byte[] tableStream, int offset, int size, int fcMin) {
- // get our plex of PieceDescriptors
- PlexOfCps pieceTable = new PlexOfCps(tableStream, offset, size, PieceDescriptor
- .getSizeInBytes());
-
- int length = pieceTable.length();
- PieceDescriptor[] pieces = new PieceDescriptor[length];
-
- // iterate through piece descriptors raw bytes and create
- // PieceDescriptor objects
- for (int x = 0; x < length; x++) {
- GenericPropertyNode node = pieceTable.getProperty(x);
- pieces[x] = new PieceDescriptor(node.getBytes(), 0);
- }
-
- // Figure out the cp of the earliest text piece
- // Note that text pieces don't have to be stored in order!
- _cpMin = pieces[0].getFilePosition() - fcMin;
- for (int x = 0; x < pieces.length; x++) {
- int start = pieces[x].getFilePosition() - fcMin;
- if (start < _cpMin) {
- _cpMin = start;
- }
- }
-
- // using the PieceDescriptors, build our list of TextPieces.
- for (int x = 0; x < pieces.length; x++) {
- int start = pieces[x].getFilePosition();
- GenericPropertyNode node = pieceTable.getProperty(x);
-
- // Grab the start and end, which are in characters
- int nodeStartChars = node.getStart();
- int nodeEndChars = node.getEnd();
-
- // What's the relationship between bytes and characters?
- boolean unicode = pieces[x].isUnicode();
- int multiple = 1;
- if (unicode) {
- multiple = 2;
- }
-
- // Figure out the length, in bytes and chars
- int textSizeChars = (nodeEndChars - nodeStartChars);
- int textSizeBytes = textSizeChars * multiple;
-
- // Grab the data that makes up the piece
- byte[] buf = new byte[textSizeBytes];
- System.arraycopy(documentStream, start, buf, 0, textSizeBytes);
-
- // And now build the piece
- _textPieces.add(new TextPiece(nodeStartChars, nodeEndChars, buf, pieces[x], node
- .getStart()));
- }
-
- // In the interest of our sanity, now sort the text pieces
- // into order, if they're not already
- Collections.sort(_textPieces);
- _textPiecesFCOrder = new ArrayList<TextPiece>(_textPieces);
- Collections.sort(_textPiecesFCOrder, new FCComparator());
- }
-
- public int getCpMin() {
- return _cpMin;
- }
-
- public List<TextPiece> getTextPieces() {
- return _textPieces;
- }
-
- public void add(TextPiece piece) {
- _textPieces.add(piece);
- _textPiecesFCOrder.add(piece);
- Collections.sort(_textPieces);
- Collections.sort(_textPiecesFCOrder, new FCComparator());
+
+ public TextPieceTable()
+ {
+ }
+
+ public TextPieceTable( byte[] documentStream, byte[] tableStream,
+ int offset, int size, int fcMin )
+ {
+ // get our plex of PieceDescriptors
+ PlexOfCps pieceTable = new PlexOfCps( tableStream, offset, size,
+ PieceDescriptor.getSizeInBytes() );
+
+ int length = pieceTable.length();
+ PieceDescriptor[] pieces = new PieceDescriptor[length];
+
+ // iterate through piece descriptors raw bytes and create
+ // PieceDescriptor objects
+ for ( int x = 0; x < length; x++ )
+ {
+ GenericPropertyNode node = pieceTable.getProperty( x );
+ pieces[x] = new PieceDescriptor( node.getBytes(), 0 );
+ }
+
+ // Figure out the cp of the earliest text piece
+ // Note that text pieces don't have to be stored in order!
+ _cpMin = pieces[0].getFilePosition() - fcMin;
+ for ( int x = 0; x < pieces.length; x++ )
+ {
+ int start = pieces[x].getFilePosition() - fcMin;
+ if ( start < _cpMin )
+ {
+ _cpMin = start;
+ }
+ }
+
+ // using the PieceDescriptors, build our list of TextPieces.
+ for ( int x = 0; x < pieces.length; x++ )
+ {
+ int start = pieces[x].getFilePosition();
+ GenericPropertyNode node = pieceTable.getProperty( x );
+
+ // Grab the start and end, which are in characters
+ int nodeStartChars = node.getStart();
+ int nodeEndChars = node.getEnd();
+
+ // What's the relationship between bytes and characters?
+ boolean unicode = pieces[x].isUnicode();
+ int multiple = 1;
+ if ( unicode )
+ {
+ multiple = 2;
+ }
+
+ // Figure out the length, in bytes and chars
+ int textSizeChars = ( nodeEndChars - nodeStartChars );
+ int textSizeBytes = textSizeChars * multiple;
+
+ // Grab the data that makes up the piece
+ byte[] buf = new byte[textSizeBytes];
+ System.arraycopy( documentStream, start, buf, 0, textSizeBytes );
+
+ // And now build the piece
+ _textPieces.add( new TextPiece( nodeStartChars, nodeEndChars, buf,
+ pieces[x], node.getStart() ) );
+ }
+
+ // In the interest of our sanity, now sort the text pieces
+ // into order, if they're not already
+ Collections.sort( _textPieces );
+ _textPiecesFCOrder = new ArrayList<TextPiece>( _textPieces );
+ Collections.sort( _textPiecesFCOrder, new FCComparator() );
+ }
+
+ public void add( TextPiece piece )
+ {
+ _textPieces.add( piece );
+ _textPiecesFCOrder.add( piece );
+ Collections.sort( _textPieces );
+ Collections.sort( _textPiecesFCOrder, new FCComparator() );
+ }
+
+ /**
+ * Adjust all the text piece after inserting some text into one of them
+ *
+ * @param listIndex
+ * The TextPiece that had characters inserted into
+ * @param length
+ * The number of characters inserted
+ */
+ public int adjustForInsert( int listIndex, int length )
+ {
+ int size = _textPieces.size();
+
+ TextPiece tp = _textPieces.get( listIndex );
+
+ // Update with the new end
+ tp.setEnd( tp.getEnd() + length );
+
+ // Now change all subsequent ones
+ for ( int x = listIndex + 1; x < size; x++ )
+ {
+ tp = _textPieces.get( x );
+ tp.setStart( tp.getStart() + length );
+ tp.setEnd( tp.getEnd() + length );
+ }
+
+ // All done
+ return length;
}
- public byte[] writeTo(HWPFOutputStream docStream) throws IOException {
-
- PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes());
- // int fcMin = docStream.getOffset();
-
- int size = _textPieces.size();
- for (int x = 0; x < size; x++) {
- TextPiece next = _textPieces.get(x);
- PieceDescriptor pd = next.getPieceDescriptor();
-
- int offset = docStream.getOffset();
- int mod = (offset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE);
- if (mod != 0) {
- mod = POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod;
- byte[] buf = new byte[mod];
- docStream.write(buf);
- }
-
- // set the text piece position to the current docStream offset.
- pd.setFilePosition(docStream.getOffset());
-
- // write the text to the docstream and save the piece descriptor to
- // the
- // plex which will be written later to the tableStream.
- docStream.write(next.getRawBytes());
-
- // The TextPiece is already in characters, which
- // makes our life much easier
- int nodeStart = next.getStart();
- int nodeEnd = next.getEnd();
- textPlex.addProperty(new GenericPropertyNode(nodeStart, nodeEnd, pd.toByteArray()));
- }
-
- return textPlex.toByteArray();
-
- }
-
- /**
- * Adjust all the text piece after inserting some text into one of them
- *
- * @param listIndex
- * The TextPiece that had characters inserted into
- * @param length
- * The number of characters inserted
- */
- public int adjustForInsert(int listIndex, int length) {
- int size = _textPieces.size();
-
- TextPiece tp = _textPieces.get(listIndex);
-
- // Update with the new end
- tp.setEnd(tp.getEnd() + length);
-
- // Now change all subsequent ones
- for (int x = listIndex + 1; x < size; x++) {
- tp = _textPieces.get(x);
- tp.setStart(tp.getStart() + length);
- tp.setEnd(tp.getEnd() + length);
- }
-
- // All done
- return length;
- }
-
- public boolean equals(Object o) {
- TextPieceTable tpt = (TextPieceTable) o;
-
- int size = tpt._textPieces.size();
- if (size == _textPieces.size()) {
- for (int x = 0; x < size; x++) {
- if (!tpt._textPieces.get(x).equals(_textPieces.get(x))) {
- return false;
- }
- }
- return true;
- }
- return false;
- }
+ public boolean equals( Object o )
+ {
+ TextPieceTable tpt = (TextPieceTable) o;
+
+ int size = tpt._textPieces.size();
+ if ( size == _textPieces.size() )
+ {
+ for ( int x = 0; x < size; x++ )
+ {
+ if ( !tpt._textPieces.get( x ).equals( _textPieces.get( x ) ) )
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
public int getByteIndex( int charPos )
{
return byteCount;
}
- public int getCharIndex(int bytePos) {
- return getCharIndex(bytePos, 0);
+ public int getCharIndex( int bytePos )
+ {
+ return getCharIndex( bytePos, 0 );
}
- public int getCharIndex(int bytePos, int startCP) {
+ public int getCharIndex( int startBytePos, int startCP )
+ {
int charCount = 0;
- bytePos = lookIndexForward(bytePos);
+ int bytePos = lookIndexForward( startBytePos );
- for(TextPiece tp : _textPieces) {
+ for ( TextPiece tp : _textPieces )
+ {
int pieceStart = tp.getPieceDescriptor().getFilePosition();
int bytesLength = tp.bytesLength();
int toAdd;
- if (bytePos< pieceStart || bytePos > pieceEnd) {
+ if ( bytePos < pieceStart || bytePos > pieceEnd )
+ {
toAdd = bytesLength;
- } else if (bytePos > pieceStart && bytePos < pieceEnd) {
- toAdd = (bytePos - pieceStart);
- } else {
- toAdd = bytesLength - (pieceEnd - bytePos);
+ }
+ else if ( bytePos > pieceStart && bytePos < pieceEnd )
+ {
+ toAdd = ( bytePos - pieceStart );
+ }
+ else
+ {
+ toAdd = bytesLength - ( pieceEnd - bytePos );
}
- if (tp.isUnicode()) {
+ if ( tp.isUnicode() )
+ {
charCount += toAdd / 2;
- } else {
+ }
+ else
+ {
charCount += toAdd;
}
- if (bytePos>=pieceStart && bytePos<=pieceEnd && charCount>=startCP) {
+ if ( bytePos >= pieceStart && bytePos <= pieceEnd
+ && charCount >= startCP )
+ {
break;
}
}
return charCount;
}
- public int lookIndexForward(int bytePos) {
- for(TextPiece tp : _textPiecesFCOrder) {
- int pieceStart = tp.getPieceDescriptor().getFilePosition();
+ public int getCpMin()
+ {
+ return _cpMin;
+ }
+
+ public List<TextPiece> getTextPieces()
+ {
+ return _textPieces;
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return _textPieces.size();
+ }
+
+ public boolean isIndexInTable( int bytePos )
+ {
+ for ( TextPiece tp : _textPiecesFCOrder )
+ {
+ int pieceStart = tp.getPieceDescriptor().getFilePosition();
- if (bytePos >= pieceStart + tp.bytesLength()) {
+ if ( bytePos > pieceStart + tp.bytesLength() )
+ {
continue;
}
- if (pieceStart > bytePos) {
- bytePos = pieceStart;
- }
+ if ( pieceStart > bytePos )
+ {
+ return false;
+ }
- break;
+ return true;
}
- return bytePos;
+
+ return false;
+ }
+
+ boolean isIndexInTable( int startBytePos, int endBytePos )
+ {
+ for ( TextPiece tp : _textPiecesFCOrder )
+ {
+ int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+ if ( startBytePos >= pieceStart + tp.bytesLength() )
+ {
+ continue;
+ }
+
+ int left = Math.max( startBytePos, pieceStart );
+ int right = Math.min( endBytePos, pieceStart + tp.bytesLength() );
+
+ if ( left >= right )
+ return false;
+
+ return true;
+ }
+
+ return false;
}
- public int lookIndexBackward(int bytePos) {
+ public int lookIndexBackward( final int startBytePos )
+ {
+ int bytePos = startBytePos;
int lastEnd = 0;
- for(TextPiece tp : _textPiecesFCOrder) {
- int pieceStart = tp.getPieceDescriptor().getFilePosition();
+ for ( TextPiece tp : _textPiecesFCOrder )
+ {
+ int pieceStart = tp.getPieceDescriptor().getFilePosition();
- if (bytePos > pieceStart + tp.bytesLength()) {
+ if ( bytePos > pieceStart + tp.bytesLength() )
+ {
lastEnd = pieceStart + tp.bytesLength();
continue;
}
- if (pieceStart > bytePos) {
- bytePos = lastEnd;
- }
+ if ( pieceStart > bytePos )
+ {
+ bytePos = lastEnd;
+ }
break;
}
return bytePos;
}
- public boolean isIndexInTable(int bytePos) {
- for(TextPiece tp : _textPiecesFCOrder) {
- int pieceStart = tp.getPieceDescriptor().getFilePosition();
+ public int lookIndexForward( final int startBytePos )
+ {
+ int bytePos = startBytePos;
+ for ( TextPiece tp : _textPiecesFCOrder )
+ {
+ int pieceStart = tp.getPieceDescriptor().getFilePosition();
- if (bytePos > pieceStart + tp.bytesLength()) {
+ if ( bytePos >= pieceStart + tp.bytesLength() )
+ {
continue;
}
- if (pieceStart > bytePos) {
- return false;
- }
+ if ( pieceStart > bytePos )
+ {
+ bytePos = pieceStart;
+ }
- return true;
+ break;
}
-
- return false;
+ return bytePos;
}
- boolean isIndexInTable( int startBytePos, int endBytePos )
+ public byte[] writeTo( HWPFOutputStream docStream ) throws IOException
{
- for(TextPiece tp : _textPiecesFCOrder) {
- int pieceStart = tp.getPieceDescriptor().getFilePosition();
+ PlexOfCps textPlex = new PlexOfCps( PieceDescriptor.getSizeInBytes() );
+ // int fcMin = docStream.getOffset();
- if (startBytePos >= pieceStart + tp.bytesLength()) {
- continue;
+ int size = _textPieces.size();
+ for ( int x = 0; x < size; x++ )
+ {
+ TextPiece next = _textPieces.get( x );
+ PieceDescriptor pd = next.getPieceDescriptor();
+
+ int offset = docStream.getOffset();
+ int mod = ( offset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE );
+ if ( mod != 0 )
+ {
+ mod = POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod;
+ byte[] buf = new byte[mod];
+ docStream.write( buf );
}
- int left = Math.max( startBytePos, pieceStart );
- int right = Math.min( endBytePos, pieceStart + tp.bytesLength() );
+ // set the text piece position to the current docStream offset.
+ pd.setFilePosition( docStream.getOffset() );
- if (left >= right)
- return false;
+ // write the text to the docstream and save the piece descriptor to
+ // the
+ // plex which will be written later to the tableStream.
+ docStream.write( next.getRawBytes() );
- return true;
+ // The TextPiece is already in characters, which
+ // makes our life much easier
+ int nodeStart = next.getStart();
+ int nodeEnd = next.getEnd();
+ textPlex.addProperty( new GenericPropertyNode( nodeStart, nodeEnd,
+ pd.toByteArray() ) );
}
- return false;
+ return textPlex.toByteArray();
}
- private static class FCComparator implements Comparator<TextPiece> {
- public int compare(TextPiece textPiece, TextPiece textPiece1) {
- if (textPiece.getPieceDescriptor().fc>textPiece1.getPieceDescriptor().fc) {
+ private static class FCComparator implements Comparator<TextPiece>
+ {
+ public int compare( TextPiece textPiece, TextPiece textPiece1 )
+ {
+ if ( textPiece.getPieceDescriptor().fc > textPiece1
+ .getPieceDescriptor().fc )
+ {
return 1;
- } else if (textPiece.getPieceDescriptor().fc<textPiece1.getPieceDescriptor().fc) {
+ }
+ else if ( textPiece.getPieceDescriptor().fc < textPiece1
+ .getPieceDescriptor().fc )
+ {
return -1;
- } else {
+ }
+ else
+ {
return 0;
}
}