* The piece table for matching up character positions to bits of text. This
* mostly works in bytes, but the TextPieces themselves work in characters. This
* does the icky convertion.
- *
+ *
* @author Ryan Ackley
*/
@Internal
-public class TextPieceTable implements CharIndexTranslator
-{
+public class TextPieceTable implements CharIndexTranslator {
private static final POILogger logger = POILogFactory
- .getLogger( TextPieceTable.class );
+ .getLogger(TextPieceTable.class);
// int _multiple;
int _cpMin;
protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
protected ArrayList<TextPiece> _textPiecesFCOrder = new ArrayList<TextPiece>();
- public TextPieceTable()
- {
+ public TextPieceTable() {
}
- public TextPieceTable( byte[] documentStream, byte[] tableStream,
- int offset, int size, int fcMin )
- {
+ public TextPieceTable(byte[] documentStream, byte[] tableStream,
+ int offset, int size, int fcMin) {
// get our plex of PieceDescriptors
- PlexOfCps pieceTable = new PlexOfCps( tableStream, offset, size,
- PieceDescriptor.getSizeInBytes() );
+ PlexOfCps pieceTable = new PlexOfCps(tableStream, offset, size,
+ PieceDescriptor.getSizeInBytes());
int length = pieceTable.length();
PieceDescriptor[] pieces = new PieceDescriptor[length];
// iterate through piece descriptors raw bytes and create
// PieceDescriptor objects
- for ( int x = 0; x < length; x++ )
- {
- GenericPropertyNode node = pieceTable.getProperty( x );
- pieces[x] = new PieceDescriptor( node.getBytes(), 0 );
+ for (int x = 0; x < length; x++) {
+ GenericPropertyNode node = pieceTable.getProperty(x);
+ pieces[x] = new PieceDescriptor(node.getBytes(), 0);
}
// Figure out the cp of the earliest text piece
// Note that text pieces don't have to be stored in order!
_cpMin = pieces[0].getFilePosition() - fcMin;
- for ( int x = 0; x < pieces.length; x++ )
- {
- int start = pieces[x].getFilePosition() - fcMin;
- if ( start < _cpMin )
- {
+ for (PieceDescriptor piece : pieces) {
+ int start = piece.getFilePosition() - fcMin;
+ if (start < _cpMin) {
_cpMin = start;
}
}
// using the PieceDescriptors, build our list of TextPieces.
- for ( int x = 0; x < pieces.length; x++ )
- {
+ for (int x = 0; x < pieces.length; x++) {
int start = pieces[x].getFilePosition();
- GenericPropertyNode node = pieceTable.getProperty( x );
+ GenericPropertyNode node = pieceTable.getProperty(x);
// Grab the start and end, which are in characters
int nodeStartChars = node.getStart();
// What's the relationship between bytes and characters?
boolean unicode = pieces[x].isUnicode();
int multiple = 1;
- if ( unicode )
- {
+ if (unicode) {
multiple = 2;
}
// Figure out the length, in bytes and chars
- int textSizeChars = ( nodeEndChars - nodeStartChars );
+ int textSizeChars = (nodeEndChars - nodeStartChars);
int textSizeBytes = textSizeChars * multiple;
// Grab the data that makes up the piece
byte[] buf = new byte[textSizeBytes];
- System.arraycopy( documentStream, start, buf, 0, textSizeBytes );
+ System.arraycopy(documentStream, start, buf, 0, textSizeBytes);
// And now build the piece
- final TextPiece newTextPiece = new TextPiece( nodeStartChars, nodeEndChars, buf,
- pieces[x] );
+ final TextPiece newTextPiece = new TextPiece(nodeStartChars, nodeEndChars, buf,
+ pieces[x]);
- _textPieces.add( newTextPiece );
+ _textPieces.add(newTextPiece);
}
// In the interest of our sanity, now sort the text pieces
// into order, if they're not already
- Collections.sort( _textPieces );
- _textPiecesFCOrder = new ArrayList<TextPiece>( _textPieces );
- Collections.sort( _textPiecesFCOrder, new FCComparator() );
+ Collections.sort(_textPieces);
+ _textPiecesFCOrder = new ArrayList<TextPiece>(_textPieces);
+ Collections.sort(_textPiecesFCOrder, new FCComparator());
}
- public void add( TextPiece piece )
- {
- _textPieces.add( piece );
- _textPiecesFCOrder.add( piece );
- Collections.sort( _textPieces );
- Collections.sort( _textPiecesFCOrder, new FCComparator() );
+ public void add(TextPiece piece) {
+ _textPieces.add(piece);
+ _textPiecesFCOrder.add(piece);
+ Collections.sort(_textPieces);
+ Collections.sort(_textPiecesFCOrder, new FCComparator());
}
/**
* Adjust all the text piece after inserting some text into one of them
- *
- * @param listIndex
- * The TextPiece that had characters inserted into
- * @param length
- * The number of characters inserted
+ *
+ * @param listIndex The TextPiece that had characters inserted into
+ * @param length The number of characters inserted
*/
- public int adjustForInsert( int listIndex, int length )
- {
+ public int adjustForInsert(int listIndex, int length) {
int size = _textPieces.size();
- TextPiece tp = _textPieces.get( listIndex );
+ TextPiece tp = _textPieces.get(listIndex);
// Update with the new end
- tp.setEnd( tp.getEnd() + length );
+ tp.setEnd(tp.getEnd() + length);
// Now change all subsequent ones
- for ( int x = listIndex + 1; x < size; x++ )
- {
- tp = _textPieces.get( x );
- tp.setStart( tp.getStart() + length );
- tp.setEnd( tp.getEnd() + length );
+ for (int x = listIndex + 1; x < size; x++) {
+ tp = _textPieces.get(x);
+ tp.setStart(tp.getStart() + length);
+ tp.setEnd(tp.getEnd() + length);
}
// All done
return length;
}
- public boolean equals( Object o )
- {
+ public boolean equals(Object o) {
if (!(o instanceof TextPieceTable)) return false;
TextPieceTable tpt = (TextPieceTable) o;
int size = tpt._textPieces.size();
- if ( size == _textPieces.size() )
- {
- for ( int x = 0; x < size; x++ )
- {
- if ( !tpt._textPieces.get( x ).equals( _textPieces.get( x ) ) )
- {
+ if (size == _textPieces.size()) {
+ for (int x = 0; x < size; x++) {
+ if (!tpt._textPieces.get(x).equals(_textPieces.get(x))) {
return false;
}
}
return false;
}
- public int getByteIndex( int charPos )
- {
+ public int getByteIndex(int charPos) {
int byteCount = 0;
- for ( TextPiece tp : _textPieces )
- {
- if ( charPos >= tp.getEnd() )
- {
+ for (TextPiece tp : _textPieces) {
+ if (charPos >= tp.getEnd()) {
byteCount = tp.getPieceDescriptor().getFilePosition()
- + ( tp.getEnd() - tp.getStart() )
- * ( tp.isUnicode() ? 2 : 1 );
+ + (tp.getEnd() - tp.getStart())
+ * (tp.isUnicode() ? 2 : 1);
- if ( charPos == tp.getEnd() )
+ if (charPos == tp.getEnd())
break;
continue;
}
- if ( charPos < tp.getEnd() )
- {
+ if (charPos < tp.getEnd()) {
int left = charPos - tp.getStart();
byteCount = tp.getPieceDescriptor().getFilePosition() + left
- * ( tp.isUnicode() ? 2 : 1 );
+ * (tp.isUnicode() ? 2 : 1);
break;
}
}
}
@Deprecated
- public int getCharIndex( int bytePos )
- {
- return getCharIndex( bytePos, 0 );
+ public int getCharIndex(int bytePos) {
+ return getCharIndex(bytePos, 0);
}
@Deprecated
- public int getCharIndex( int startBytePos, int startCP )
- {
+ public int getCharIndex(int startBytePos, int startCP) {
int charCount = 0;
- int bytePos = lookIndexForward( startBytePos );
+ int bytePos = lookIndexForward(startBytePos);
- for ( TextPiece tp : _textPieces )
- {
+ for (TextPiece tp : _textPieces) {
int pieceStart = tp.getPieceDescriptor().getFilePosition();
int bytesLength = tp.bytesLength();
int toAdd;
- if ( bytePos < pieceStart || bytePos > pieceEnd )
- {
+ if (bytePos < pieceStart || bytePos > pieceEnd) {
toAdd = bytesLength;
- }
- else if ( bytePos > pieceStart && bytePos < pieceEnd )
- {
- toAdd = ( bytePos - pieceStart );
- }
- else
- {
- toAdd = bytesLength - ( pieceEnd - bytePos );
+ } else if (bytePos > pieceStart && bytePos < pieceEnd) {
+ toAdd = (bytePos - pieceStart);
+ } else {
+ toAdd = bytesLength - (pieceEnd - bytePos);
}
- if ( tp.isUnicode() )
- {
+ if (tp.isUnicode()) {
charCount += toAdd / 2;
- }
- else
- {
+ } else {
charCount += toAdd;
}
- if ( bytePos >= pieceStart && bytePos <= pieceEnd
- && charCount >= startCP )
- {
+ if (bytePos >= pieceStart && bytePos <= pieceEnd
+ && charCount >= startCP) {
break;
}
}
}
@Override
- public int[][] getCharIndexRanges( int startBytePosInclusive,
- int endBytePosExclusive )
- {
+ public int[][] getCharIndexRanges(int startBytePosInclusive,
+ int endBytePosExclusive) {
List<int[]> result = new LinkedList<int[]>();
- for ( TextPiece textPiece : _textPiecesFCOrder )
- {
+ for (TextPiece textPiece : _textPiecesFCOrder) {
final int tpStart = textPiece.getPieceDescriptor()
.getFilePosition();
final int tpEnd = textPiece.getPieceDescriptor().getFilePosition()
+ textPiece.bytesLength();
- if ( startBytePosInclusive > tpEnd )
+ if (startBytePosInclusive > tpEnd)
continue;
- if ( endBytePosExclusive <= tpStart )
+ if (endBytePosExclusive <= tpStart)
break;
- final int rangeStartBytes = Math.max( tpStart,
- startBytePosInclusive );
- final int rangeEndBytes = Math.min( tpEnd, endBytePosExclusive );
+ final int rangeStartBytes = Math.max(tpStart,
+ startBytePosInclusive);
+ final int rangeEndBytes = Math.min(tpEnd, endBytePosExclusive);
final int rangeLengthBytes = rangeEndBytes - rangeStartBytes;
- if ( rangeStartBytes > rangeEndBytes )
+ if (rangeStartBytes > rangeEndBytes)
continue;
final int encodingMultiplier = textPiece.isUnicode() ? 2 : 1;
final int rangeStartCp = textPiece.getStart()
- + ( rangeStartBytes - tpStart ) / encodingMultiplier;
+ + (rangeStartBytes - tpStart) / encodingMultiplier;
final int rangeEndCp = rangeStartCp + rangeLengthBytes
/ encodingMultiplier;
- result.add( new int[] { rangeStartCp, rangeEndCp } );
+ result.add(new int[]{rangeStartCp, rangeEndCp});
}
- return result.toArray( new int[result.size()][] );
+ return result.toArray(new int[result.size()][]);
}
- public int getCpMin()
- {
+ public int getCpMin() {
return _cpMin;
}
- public StringBuilder getText()
- {
+ public StringBuilder getText() {
final long start = System.currentTimeMillis();
// rebuild document paragraphs structure
StringBuilder docText = new StringBuilder();
- for ( TextPiece textPiece : _textPieces )
- {
+ for (TextPiece textPiece : _textPieces) {
String toAppend = textPiece.getStringBuilder().toString();
int toAppendLength = toAppend.length();
- if ( toAppendLength != textPiece.getEnd() - textPiece.getStart() )
- {
+ if (toAppendLength != textPiece.getEnd() - textPiece.getStart()) {
logger.log(
POILogger.WARN,
"Text piece has boundaries [",
- Integer.valueOf( textPiece.getStart() ),
+ Integer.valueOf(textPiece.getStart()),
"; ",
- Integer.valueOf( textPiece.getEnd() ),
+ Integer.valueOf(textPiece.getEnd()),
") but length ",
- Integer.valueOf( textPiece.getEnd()
- - textPiece.getStart() ) );
+ Integer.valueOf(textPiece.getEnd()
+ - textPiece.getStart()));
}
- docText.replace( textPiece.getStart(), textPiece.getStart()
- + toAppendLength, toAppend );
+ docText.replace(textPiece.getStart(), textPiece.getStart()
+ + toAppendLength, toAppend);
}
- logger.log( POILogger.DEBUG, "Document text were rebuilded in ",
- Long.valueOf( System.currentTimeMillis() - start ), " ms (",
- Integer.valueOf( docText.length() ), " chars)" );
+ logger.log(POILogger.DEBUG, "Document text were rebuilded in ",
+ Long.valueOf(System.currentTimeMillis() - start), " ms (",
+ Integer.valueOf(docText.length()), " chars)");
return docText;
}
- public List<TextPiece> getTextPieces()
- {
+ public List<TextPiece> getTextPieces() {
return _textPieces;
}
@Override
- public int hashCode()
- {
+ public int hashCode() {
return _textPieces.size();
}
- public boolean isIndexInTable( int bytePos )
- {
- for ( TextPiece tp : _textPiecesFCOrder )
- {
+ public boolean isIndexInTable(int bytePos) {
+ for (TextPiece tp : _textPiecesFCOrder) {
int pieceStart = tp.getPieceDescriptor().getFilePosition();
- if ( bytePos > pieceStart + tp.bytesLength() )
- {
+ if (bytePos > pieceStart + tp.bytesLength()) {
continue;
}
- if ( pieceStart > bytePos )
- {
+ if (pieceStart > bytePos) {
return false;
}
return false;
}
- boolean isIndexInTable( int startBytePos, int endBytePos )
- {
- for ( TextPiece tp : _textPiecesFCOrder )
- {
+ boolean isIndexInTable(int startBytePos, int endBytePos) {
+ for (TextPiece tp : _textPiecesFCOrder) {
int pieceStart = tp.getPieceDescriptor().getFilePosition();
- if ( startBytePos >= pieceStart + tp.bytesLength() )
- {
+ if (startBytePos >= pieceStart + tp.bytesLength()) {
continue;
}
- int left = Math.max( startBytePos, pieceStart );
- int right = Math.min( endBytePos, pieceStart + tp.bytesLength() );
+ int left = Math.max(startBytePos, pieceStart);
+ int right = Math.min(endBytePos, pieceStart + tp.bytesLength());
- if ( left >= right )
+ if (left >= right)
return false;
return true;
return false;
}
- public int lookIndexBackward( final int startBytePos )
- {
+ public int lookIndexBackward(final int startBytePos) {
int bytePos = startBytePos;
int lastEnd = 0;
- for ( TextPiece tp : _textPiecesFCOrder )
- {
+ for (TextPiece tp : _textPiecesFCOrder) {
int pieceStart = tp.getPieceDescriptor().getFilePosition();
- if ( bytePos > pieceStart + tp.bytesLength() )
- {
+ if (bytePos > pieceStart + tp.bytesLength()) {
lastEnd = pieceStart + tp.bytesLength();
continue;
}
- if ( pieceStart > bytePos )
- {
+ if (pieceStart > bytePos) {
bytePos = lastEnd;
}
return bytePos;
}
- public int lookIndexForward( final int startBytePos )
- {
- if ( _textPiecesFCOrder.isEmpty() )
- throw new IllegalStateException( "Text pieces table is empty" );
+ public int lookIndexForward(final int startBytePos) {
+ if (_textPiecesFCOrder.isEmpty())
+ throw new IllegalStateException("Text pieces table is empty");
- if ( _textPiecesFCOrder.get( 0 ).getPieceDescriptor().getFilePosition() > startBytePos )
- return _textPiecesFCOrder.get( 0 ).getPieceDescriptor().getFilePosition();
+ if (_textPiecesFCOrder.get(0).getPieceDescriptor().getFilePosition() > startBytePos)
+ return _textPiecesFCOrder.get(0).getPieceDescriptor().getFilePosition();
- if ( _textPiecesFCOrder.get( _textPiecesFCOrder.size() - 1 )
- .getPieceDescriptor().getFilePosition() <= startBytePos )
+ if (_textPiecesFCOrder.get(_textPiecesFCOrder.size() - 1)
+ .getPieceDescriptor().getFilePosition() <= startBytePos)
return startBytePos;
int low = 0;
int high = _textPiecesFCOrder.size() - 1;
- while ( low <= high )
- {
- int mid = ( low + high ) >>> 1;
- final TextPiece textPiece = _textPiecesFCOrder.get( mid );
+ while (low <= high) {
+ int mid = (low + high) >>> 1;
+ final TextPiece textPiece = _textPiecesFCOrder.get(mid);
int midVal = textPiece.getPieceDescriptor().getFilePosition();
- if ( midVal < startBytePos )
+ if (midVal < startBytePos)
low = mid + 1;
- else if ( midVal > startBytePos )
+ else if (midVal > startBytePos)
high = mid - 1;
else
// found piece with exact start
return textPiece.getPieceDescriptor().getFilePosition();
}
assert low == high;
- assert _textPiecesFCOrder.get( low ).getPieceDescriptor()
+ assert _textPiecesFCOrder.get(low).getPieceDescriptor()
.getFilePosition() < startBytePos;
// last line can't be current, can it?
- assert _textPiecesFCOrder.get( low + 1 ).getPieceDescriptor()
+ assert _textPiecesFCOrder.get(low + 1).getPieceDescriptor()
.getFilePosition() > startBytePos;
// shifting to next piece start
- return _textPiecesFCOrder.get( low + 1 ).getPieceDescriptor().getFilePosition();
+ return _textPiecesFCOrder.get(low + 1).getPieceDescriptor().getFilePosition();
}
- public byte[] writeTo( HWPFOutputStream docStream ) throws IOException
- {
- PlexOfCps textPlex = new PlexOfCps( PieceDescriptor.getSizeInBytes() );
+ public byte[] writeTo(HWPFOutputStream docStream) throws IOException {
+ PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes());
// int fcMin = docStream.getOffset();
- int size = _textPieces.size();
- for ( int x = 0; x < size; x++ )
- {
- TextPiece next = _textPieces.get( x );
+ for (TextPiece next : _textPieces) {
PieceDescriptor pd = next.getPieceDescriptor();
int offset = docStream.getOffset();
- int mod = ( offset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE );
- if ( mod != 0 )
- {
+ int mod = (offset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE);
+ if (mod != 0) {
mod = POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod;
byte[] buf = new byte[mod];
- docStream.write( buf );
+ docStream.write(buf);
}
// set the text piece position to the current docStream offset.
- pd.setFilePosition( docStream.getOffset() );
+ pd.setFilePosition(docStream.getOffset());
// write the text to the docstream and save the piece descriptor to
// the
// plex which will be written later to the tableStream.
- docStream.write( next.getRawBytes() );
+ docStream.write(next.getRawBytes());
// The TextPiece is already in characters, which
// makes our life much easier
int nodeStart = next.getStart();
int nodeEnd = next.getEnd();
- textPlex.addProperty( new GenericPropertyNode( nodeStart, nodeEnd,
- pd.toByteArray() ) );
+ textPlex.addProperty(new GenericPropertyNode(nodeStart, nodeEnd,
+ pd.toByteArray()));
}
return textPlex.toByteArray();
}
- private static class FCComparator implements Comparator<TextPiece>, Serializable
- {
- public int compare( TextPiece textPiece, TextPiece textPiece1 )
- {
- if ( textPiece.getPieceDescriptor().fc > textPiece1
- .getPieceDescriptor().fc )
- {
+ private static class FCComparator implements Comparator<TextPiece>, Serializable {
+ public int compare(TextPiece textPiece, TextPiece textPiece1) {
+ if (textPiece.getPieceDescriptor().fc > textPiece1
+ .getPieceDescriptor().fc) {
return 1;
- }
- else if ( textPiece.getPieceDescriptor().fc < textPiece1
- .getPieceDescriptor().fc )
- {
+ } else if (textPiece.getPieceDescriptor().fc < textPiece1
+ .getPieceDescriptor().fc) {
return -1;
- }
- else
- {
+ } else {
return 0;
}
}