From: Sergey Vladimirov Date: Thu, 7 Jul 2011 10:39:27 +0000 (+0000) Subject: fix test case for 45473: calculate PAPX boundaries basing on char positions, not... X-Git-Tag: REL_3_8_BETA4~288 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=5911ff3bdb6c5fe5b2cf65ba9b9f4c2e34322791;p=poi.git fix test case for 45473: calculate PAPX boundaries basing on char positions, not on previously read byte positions (they are outdated); fix boundaries checks (again) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143753 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java index 2116864143..2ef8b55b0e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java @@ -18,6 +18,15 @@ package org.apache.poi.hwpf.model; public interface CharIndexTranslator { + /** + * Calculates the byte index of the given char index. + * + * @param charPos + * The char position + * @return The byte index + */ + int getByteIndex( int charPos ); + /** * Calculates the char index of the given byte index. * Look forward if index is not in table @@ -36,7 +45,7 @@ public interface CharIndexTranslator { * @return the char index */ int getCharIndex(int bytePos, int startCP); - + /** * Check if index is in table * diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java index 87218b35e5..8cc0c5b303 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java @@ -74,6 +74,11 @@ public final class OldSectionTable extends SectionTable this.tpt = tpt; } + public int getByteIndex( int charPos ) + { + return charPos; + } + public int getCharIndex(int bytePos, int startCP) { return bytePos; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java index 9d0226e759..4ca1087cca 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java @@ -17,13 +17,13 @@ package org.apache.poi.hwpf.model; -import java.util.ArrayList; import java.io.IOException; import java.io.OutputStream; +import java.util.ArrayList; -import org.apache.poi.hwpf.model.io.*; +import org.apache.poi.hwpf.model.io.HWPFFileSystem; +import org.apache.poi.hwpf.model.io.HWPFOutputStream; import org.apache.poi.hwpf.sprm.SprmBuffer; - import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.LittleEndian; @@ -223,7 +223,7 @@ public class PAPBinTable PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream); pfkp.fill(overflow); - byte[] bufFkp = pfkp.toByteArray(fcMin); + byte[] bufFkp = pfkp.toByteArray(tpt, fcMin); docStream.write(bufFkp); overflow = pfkp.getOverflow(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java index 965e328051..14c677db40 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java @@ -17,11 +17,11 @@ package org.apache.poi.hwpf.model; -import org.apache.poi.util.LittleEndian; - import java.util.ArrayList; -import java.util.List; import java.util.Arrays; +import java.util.List; + +import org.apache.poi.util.LittleEndian; /** * Represents a PAP FKP. The style properties for paragraph and character runs @@ -137,7 +137,7 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage { * @param fcMin The file offset in the main stream where text begins. * @return A byte array representing this data structure. */ - protected byte[] toByteArray(int fcMin) + protected byte[] toByteArray(CharIndexTranslator translator, int fcMin) { byte[] buf = new byte[512]; int size = _papxList.size(); @@ -152,7 +152,7 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage { int index = 0; for (; index < size; index++) { - byte[] grpprl = ((PAPX)_papxList.get(index)).getGrpprl(); + byte[] grpprl = _papxList.get(index).getGrpprl(); int grpprlLength = grpprl.length; // is grpprl huge? @@ -255,7 +255,10 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage { grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2)); grpprlOffset -= (grpprlOffset % 2); } - LittleEndian.putInt(buf, fcOffset, papx.getStartBytes() + fcMin); + // LittleEndian.putInt( buf, fcOffset, + // papx.getStartBytes() ); + LittleEndian.putInt( buf, fcOffset, + translator.getByteIndex( papx.getStart() ) ); buf[bxOffset] = (byte)(grpprlOffset/2); System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length); @@ -283,7 +286,9 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage { } - LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin); + // LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin); + LittleEndian.putInt( buf, fcOffset, + translator.getByteIndex( papx.getEnd() ) ); return buf; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java index 1707ccd71a..4d9a315982 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java @@ -123,6 +123,31 @@ public final class PAPX extends BytePropertyNode { return (SprmBuffer)_buf; } + /** + * @deprecated Though bytes are actually stored in file, it is advised to + * use char positions for all operations. Including save + * operations, because only char positions are preserved. + */ + @Deprecated + @Override + public int getEndBytes() + { + return super.getEndBytes(); + } + + /** + * @deprecated Though bytes are actually stored in file, it is advised to + * use char positions for all operations. Including save + * operations, because only char positions are preserved. + */ + @Deprecated + @Override + public int getStartBytes() + { + // TODO Auto-generated method stub + return super.getStartBytes(); + } + public ParagraphProperties getParagraphProperties(StyleSheet ss) { if(ss == null) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java index 8607b2109f..c6f9cb2457 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java @@ -179,14 +179,22 @@ public class SectionTable // add the section descriptor bytes to the PlexOfCps. - - // original line - - //GenericPropertyNode property = new GenericPropertyNode(sepx.getStart(), sepx.getEnd(), sed.toByteArray()); - - // Line using Ryan's FCtoCP() conversion method - - // unable to observe any effect on our testcases when using this code - piers - GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray()); - + /* original line */ + // GenericPropertyNode property = new + // GenericPropertyNode(sepx.getStart(), sepx.getEnd(), + // sed.toByteArray()); + /* + * Line using Ryan's FCtoCP() conversion method - unable to observe + * any effect on our testcases when using this code - piers + */ + /* + * there is an effect on Bug45743.doc actually. writeoutreadback + * changes byte offset of chars (but preserve string offsets) - + * sergey + */ + GenericPropertyNode property = new GenericPropertyNode( + tpt.getCharIndex( sepx.getStartBytes() ), + tpt.getCharIndex( sepx.getEndBytes() ), sed.toByteArray() ); plex.addProperty(property); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java index b4b02b5024..c8e3db2f83 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java @@ -17,15 +17,15 @@ package org.apache.poi.hwpf.model; -import org.apache.poi.hwpf.model.io.HWPFOutputStream; -import org.apache.poi.poifs.common.POIFSConstants; - import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.List; +import org.apache.poi.hwpf.model.io.HWPFOutputStream; +import org.apache.poi.poifs.common.POIFSConstants; + /** * The piece table for matching up character positions to bits of text. This * mostly works in bytes, but the TextPieces themselves work in characters. This @@ -197,6 +197,33 @@ public class TextPieceTable implements CharIndexTranslator { return false; } + public int getByteIndex( int charPos ) + { + int byteCount = 0; + for ( TextPiece tp : _textPieces ) + { + if ( charPos >= tp.getEnd() ) + { + byteCount = tp.getPieceDescriptor().getFilePosition() + + ( tp.getEnd() - tp.getStart() ) + * ( tp.isUnicode() ? 2 : 1 ); + + if ( charPos == tp.getEnd() ) + break; + + continue; + } + if ( charPos < tp.getEnd() ) + { + int left = charPos - tp.getStart(); + byteCount = tp.getPieceDescriptor().getFilePosition() + left + * ( tp.isUnicode() ? 2 : 1 ); + break; + } + } + return byteCount; + } + public int getCharIndex(int bytePos) { return getCharIndex(bytePos, 0); } @@ -297,7 +324,7 @@ public class TextPieceTable implements CharIndexTranslator { for(TextPiece tp : _textPiecesFCOrder) { int pieceStart = tp.getPieceDescriptor().getFilePosition(); - if (startBytePos > pieceStart + tp.bytesLength()) { + if (startBytePos >= pieceStart + tp.bytesLength()) { continue; }