From: Sergey Vladimirov Date: Mon, 11 Jul 2011 18:36:37 +0000 (+0000) Subject: SEPX uses chars as boundaries coordinates, don't do double conversions X-Git-Tag: REL_3_8_BETA4~215 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=b7c8272d7ac0bf44f0b33c24f8eaf36a274cb644;p=poi.git SEPX uses chars as boundaries coordinates, don't do double conversions git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145276 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java index 348cb03504..7901f08326 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java @@ -20,8 +20,6 @@ package org.apache.poi.hwpf.model; import java.util.Collections; import org.apache.poi.util.LittleEndian; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; /** * This class holds all of the section formatting @@ -33,15 +31,20 @@ import org.apache.poi.util.POILogger; */ public final class OldSectionTable extends SectionTable { - private static final POILogger logger = POILogFactory - .getLogger( OldSectionTable.class ); - - public OldSectionTable(byte[] documentStream, int offset, - int size, int fcMin, - TextPieceTable tpt) - { - PlexOfCps sedPlex = new PlexOfCps(documentStream, offset, size, 12); - CharIsBytes charConv = new CharIsBytes(tpt); + /** + * @deprecated Use {@link #OldSectionTable(byte[],int,int)} instead + */ + @Deprecated + @SuppressWarnings( "unused" ) + public OldSectionTable( byte[] documentStream, int offset, int size, + int fcMin, TextPieceTable tpt ) + { + this( documentStream, offset, size ); + } + + public OldSectionTable( byte[] documentStream, int offset, int size ) + { + PlexOfCps sedPlex = new PlexOfCps( documentStream, offset, size, 12 ); int length = sedPlex.length(); @@ -58,7 +61,7 @@ public final class OldSectionTable extends SectionTable // check for the optimization if (fileOffset == 0xffffffff) { - sepx = new SEPX(sed, startAt, endAt, charConv, new byte[0]); + sepx = new SEPX(sed, startAt, endAt, new byte[0]); } else { @@ -71,45 +74,11 @@ public final class OldSectionTable extends SectionTable byte[] buf = new byte[sepxSize+2]; fileOffset += LittleEndian.SHORT_SIZE; System.arraycopy(documentStream, fileOffset, buf, 0, buf.length); - sepx = new SEPX(sed, startAt, endAt, charConv, buf); + sepx = new SEPX(sed, startAt, endAt, buf); } - /* - * section descriptor in old Word files seems to refer to char - * indexes, not bytes positions. Check Word6.doc for example. - - * sergey - */ _sections.add( sepx ); } Collections.sort( _sections, PropertyNode.StartComparator.instance ); } - - private static class CharIsBytes implements CharIndexTranslator { - private TextPieceTable tpt; - private CharIsBytes(TextPieceTable tpt) { - this.tpt = tpt; - } - - public int getByteIndex( int charPos ) - { - return charPos; - } - - public int getCharIndex(int bytePos, int startCP) { - return bytePos; - } - public int getCharIndex(int bytePos) { - return bytePos; - } - - public boolean isIndexInTable(int bytePos) { - return tpt.isIndexInTable(bytePos); - } - public int lookIndexBackward(int bytePos) { - return tpt.lookIndexBackward(bytePos); - } - public int lookIndexForward(int bytePos) { - return tpt.lookIndexForward(bytePos); - } - } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java index 60e7d70af5..ab326825a1 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java @@ -22,17 +22,16 @@ import org.apache.poi.hwpf.sprm.SectionSprmUncompressor; import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.usermodel.SectionProperties; -public final class SEPX extends BytePropertyNode +public final class SEPX extends PropertyNode { SectionProperties sectionProperties; SectionDescriptor _sed; - public SEPX( SectionDescriptor sed, int start, int end, - CharIndexTranslator translator, byte[] grpprl ) + public SEPX( SectionDescriptor sed, int start, int end, byte[] grpprl ) { - super( start, end, translator, new SprmBuffer( grpprl ) ); + super( start, end, new SprmBuffer( grpprl ) ); _sed = sed; } @@ -75,7 +74,6 @@ public final class SEPX extends BytePropertyNode public String toString() { - return "SEPX from " + getStart() + " to " + getEnd() + " (in bytes " - + getStartBytes() + " to " + getEndBytes() + ")"; + return "SEPX from " + getStart() + " to " + getEnd(); } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionDescriptor.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionDescriptor.java index d4a5ef238d..7f17dfc750 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionDescriptor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionDescriptor.java @@ -19,13 +19,36 @@ package org.apache.poi.hwpf.model; import org.apache.poi.util.LittleEndian; +/** + * Section Descriptor (SED) + * + * @see page 186 for details + */ public final class SectionDescriptor { - private short fn; - private int fc; - private short fnMpr; - private int fcMpr; + /** + * "Used internally by Word" + */ + private short fn; + + /** + * "File offset in main stream to beginning of SEPX stored for section. If + * sed.fcSepx==0xFFFFFFFF, the section properties for the section are equal + * to the standard SEP (see SEP definition)." + */ + private int fcSepx; + + /** + * "Used internally by Word" + */ + private short fnMpr; + + /** + * "Points to offset in FC space of main stream where the Macintosh Print + * Record for a document created on a Macintosh will be stored" + */ + private int fcMpr; public SectionDescriptor() { @@ -35,7 +58,7 @@ public final class SectionDescriptor { fn = LittleEndian.getShort(buf, offset); offset += LittleEndian.SHORT_SIZE; - fc = LittleEndian.getInt(buf, offset); + fcSepx = LittleEndian.getInt(buf, offset); offset += LittleEndian.INT_SIZE; fnMpr = LittleEndian.getShort(buf, offset); offset += LittleEndian.SHORT_SIZE; @@ -44,12 +67,12 @@ public final class SectionDescriptor public int getFc() { - return fc; + return fcSepx; } public void setFc(int fc) { - this.fc = fc; + this.fcSepx = fc; } public boolean equals(Object o) @@ -65,7 +88,7 @@ public final class SectionDescriptor LittleEndian.putShort(buf, offset, fn); offset += LittleEndian.SHORT_SIZE; - LittleEndian.putInt(buf, offset, fc); + LittleEndian.putInt(buf, offset, fcSepx); offset += LittleEndian.INT_SIZE; LittleEndian.putShort(buf, offset, fnMpr); offset += LittleEndian.SHORT_SIZE; @@ -73,4 +96,11 @@ public final class SectionDescriptor return buf; } + + @Override + public String toString() + { + return "[SED] (fn: " + fn + "; fcSepx: " + fcSepx + "; fnMpr: " + fnMpr + + "; fcMpr: " + fcMpr + ")"; + } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java index 5f1f08a7f9..b2cbc42199 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java @@ -62,14 +62,16 @@ public class SectionTable GenericPropertyNode node = sedPlex.getProperty(x); SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0); - int fileOffset = sed.getFc(); - int startAt = CPtoFC(node.getStart()); - int endAt = CPtoFC(node.getEnd()); + int fileOffset = sed.getFc(); + // int startAt = CPtoFC(node.getStart()); + // int endAt = CPtoFC(node.getEnd()); + int startAt = node.getStart(); + int endAt = node.getEnd(); // check for the optimization if (fileOffset == 0xffffffff) { - _sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0])); + _sections.add(new SEPX(sed, startAt, endAt, new byte[0])); } else { @@ -78,7 +80,7 @@ public class SectionTable byte[] buf = new byte[sepxSize]; fileOffset += LittleEndian.SHORT_SIZE; System.arraycopy(documentStream, fileOffset, buf, 0, buf.length); - _sections.add(new SEPX(sed, startAt, endAt, tpt, buf)); + _sections.add(new SEPX(sed, startAt, endAt, buf)); } } @@ -92,7 +94,7 @@ public class SectionTable SEPX s = _sections.get(i); if(s.getEnd() == mainEndsAt) { matchAt = true; - } else if(s.getEndBytes() == mainEndsAt || s.getEndBytes() == mainEndsAt-1) { + } else if(s.getEnd() == mainEndsAt || s.getEnd() == mainEndsAt-1) { matchHalf = true; } } @@ -102,8 +104,12 @@ public class SectionTable SEPX s = _sections.get(i); GenericPropertyNode node = sedPlex.getProperty(i); - s.setStart( CPtoFC(node.getStart()) ); - s.setEnd( CPtoFC(node.getEnd()) ); + // s.setStart( CPtoFC(node.getStart()) ); + // s.setEnd( CPtoFC(node.getEnd()) ); + int startAt = node.getStart(); + int endAt = node.getEnd(); + s.setStart( startAt ); + s.setEnd( endAt ); } } @@ -130,24 +136,27 @@ public class SectionTable // normal use, but this version works with our non-contiguous test case. // So far unable to get this test case to be written out as well due to // other issues. - piers - private int CPtoFC(int CP) - { - TextPiece TP = null; - - for(int i=_text.size()-1; i>-1; i--) - { - TP = _text.get(i); - - if(CP >= TP.getCP()) break; - } - int FC = TP.getPieceDescriptor().getFilePosition(); - int offset = CP - TP.getCP(); - if (TP.isUnicode()) { - offset = offset*2; - } - FC = FC+offset; - return FC; - } + // + // i'm commenting this out, because it just doesn't work with non-contiguous + // textpieces :( Usual (as for PAPX and CHPX) call to TextPiecesTable does. + // private int CPtoFC(int CP) + // { + // TextPiece TP = null; + // + // for(int i=_text.size()-1; i>-1; i--) + // { + // TP = _text.get(i); + // + // if(CP >= TP.getCP()) break; + // } + // int FC = TP.getPieceDescriptor().getFilePosition(); + // int offset = CP - TP.getCP(); + // if (TP.isUnicode()) { + // offset = offset*2; + // } + // FC = FC+offset; + // return FC; + // } public ArrayList getSections() {