From: Nick Burch Date: Mon, 11 Aug 2008 21:25:17 +0000 (+0000) Subject: CHPXs and PAPXs are apparently cp based, but are really byte based! Work around this X-Git-Tag: REL_3_2_FINAL~169 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=36821ec4634f5a8e638c22b2039f00d076fb4df3;p=poi.git CHPXs and PAPXs are apparently cp based, but are really byte based! Work around this git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@684939 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index f06786b1fc..ab32cb05a1 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -219,24 +219,25 @@ public class HWPFDocument extends POIDocument _dataStream = new byte[0]; } - // get the start of text in the main stream - int fcMin = _fib.getFcMin(); + // Get the cp of the start of text in the main stream + // The latest spec doc says this is always zero! + int fcMin = 0; + //fcMin = _fib.getFcMin() - // load up our standard structures. + // Start to load up our standard structures. _dop = new DocumentProperties(_tableStream, _fib.getFcDop()); _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin); _tpt = _cft.getTextPieceTable(); - _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), fcMin); - _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin); - - // Word XP puts in a zero filled buffer in front of the text and it screws - // up my system for offsets. This is an adjustment. + + // Word XP and later all put in a zero filled buffer in + // front of the text. This screws up the system for offsets, + // which assume we always start at zero. This is an adjustment. int cpMin = _tpt.getCpMin(); - if (cpMin > 0) - { - _cbt.adjustForDelete(0, 0, cpMin); - _pbt.adjustForDelete(0, 0, cpMin); - } + + // Now load the rest of the properties, which need to be adjusted + // for where text really begin + _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), cpMin, _tpt); + _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), cpMin, _tpt); // Read FSPA and Escher information _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces()); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java new file mode 100644 index 0000000000..c1b5928012 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java @@ -0,0 +1,59 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.model; + +/** + * Normally PropertyNodes only ever work in characters, but + * a few cases actually store bytes, and this lets everything + * still work despite that. + * It handles the conversion as required between bytes + * and characters. + */ +public abstract class BytePropertyNode extends PropertyNode { + private boolean isUnicode; + + /** + * @param fcStart The start of the text for this property, in _bytes_ + * @param fcEnd The end of the text for this property, in _bytes_ + */ + public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) { + super( + generateCp(fcStart, isUnicode), + generateCp(fcEnd, isUnicode), + buf + ); + } + private static int generateCp(int val, boolean isUnicode) { + if(isUnicode) + return val/2; + return val; + } + + public boolean isUnicode() { + return isUnicode; + } + public int getStartBytes() { + if(isUnicode) + return getStart()*2; + return getStart(); + } + public int getEndBytes() { + if(isUnicode) + return getEnd()*2; + return getEnd(); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java index 48c5a9d8b4..69e0a67d9e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java @@ -37,6 +37,8 @@ public class CHPBinTable /** List of character properties.*/ protected ArrayList _textRuns = new ArrayList(); + /** So we can know if things are unicode or not */ + private TextPieceTable tpt; public CHPBinTable() { @@ -52,9 +54,10 @@ public class CHPBinTable * @param fcMin */ public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset, - int size, int fcMin) + int size, int fcMin, TextPieceTable tpt) { PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); + this.tpt = tpt; int length = binTable.length(); for (int x = 0; x < length; x++) @@ -65,7 +68,7 @@ public class CHPBinTable int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum; CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, - pageOffset, fcMin); + pageOffset, fcMin, tpt); int fkpSize = cfkp.size(); @@ -116,7 +119,14 @@ public class CHPBinTable public void insert(int listIndex, int cpStart, SprmBuffer buf) { - CHPX insertChpx = new CHPX(cpStart, cpStart, buf); + boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart); + + CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode); + + // Ensure character offsets are really characters + insertChpx.setStart(cpStart); + insertChpx.setEnd(cpStart); + if (listIndex == _textRuns.size()) { _textRuns.add(insertChpx); @@ -126,7 +136,16 @@ public class CHPBinTable CHPX chpx = (CHPX)_textRuns.get(listIndex); if (chpx.getStart() < cpStart) { - CHPX clone = new CHPX(cpStart, chpx.getEnd(), chpx.getSprmBuf()); + // Copy the properties of the one before to afterwards + // Will go: + // Original, until insert at point + // New one + // Clone of original, on to the old end + CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode); + // Again ensure contains character based offsets no matter what + clone.setStart(cpStart); + clone.setEnd(chpx.getEnd()); + chpx.setEnd(cpStart); _textRuns.add(listIndex + 1, insertChpx); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java index cd1a8c24f0..d5fb602b8e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java @@ -55,13 +55,14 @@ public class CHPFormattedDiskPage extends FormattedDiskPage * This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array * read from a Word file). */ - public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin) + public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin, TextPieceTable tpt) { super(documentStream, offset); for (int x = 0; x < _crun; x++) { - _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x))); + boolean isUnicode = tpt.isUnicodeAt( getStart(x) ); + _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode)); } } @@ -157,7 +158,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage chpx = (CHPX)_chpxList.get(x); byte[] grpprl = chpx.getGrpprl(); - LittleEndian.putInt(buf, fcOffset, chpx.getStart() + fcMin); + LittleEndian.putInt(buf, fcOffset, chpx.getStartBytes() + fcMin); grpprlOffset -= (1 + grpprl.length); grpprlOffset -= (grpprlOffset % 2); buf[offsetOffset] = (byte)(grpprlOffset/2); @@ -168,7 +169,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage fcOffset += FC_SIZE; } // put the last chpx's end in - LittleEndian.putInt(buf, fcOffset, chpx.getEnd() + fcMin); + LittleEndian.putInt(buf, fcOffset, chpx.getEndBytes() + fcMin); return buf; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java index 3e7b5b11fb..a89036c733 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java @@ -25,22 +25,26 @@ import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.CharacterSprmUncompressor; /** - * Comment me + * DANGER - works in bytes! + * + * Make sure you call getStart() / getEnd() when you want characters + * (normal use), but getStartByte() / getEndByte() when you're + * reading in / writing out! * * @author Ryan Ackley */ -public class CHPX extends PropertyNode +public class CHPX extends BytePropertyNode { - public CHPX(int fcStart, int fcEnd, byte[] grpprl) + public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode) { - super(fcStart, fcEnd, new SprmBuffer(grpprl)); + super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode); } - public CHPX(int fcStart, int fcEnd, SprmBuffer buf) + public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode) { - super(fcStart, fcEnd, buf); + super(fcStart, fcEnd, buf, isUnicode); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java index 6f141d7612..cde563ec08 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java @@ -39,14 +39,18 @@ public class PAPBinTable protected ArrayList _paragraphs = new ArrayList(); byte[] _dataStream; + /** So we can know if things are unicode or not */ + private TextPieceTable tpt; + public PAPBinTable() { } public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset, - int size, int fcMin) + int size, int fcMin, TextPieceTable tpt) { PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); + this.tpt = tpt; int length = binTable.length(); for (int x = 0; x < length; x++) @@ -57,13 +61,14 @@ public class PAPBinTable int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum; PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, - dataStream, pageOffset, fcMin); + dataStream, pageOffset, fcMin, tpt); int fkpSize = pfkp.size(); for (int y = 0; y < fkpSize; y++) { - _paragraphs.add(pfkp.getPAPX(y)); + PAPX papx = pfkp.getPAPX(y); + _paragraphs.add(papx); } } _dataStream = dataStream; @@ -71,7 +76,14 @@ public class PAPBinTable public void insert(int listIndex, int cpStart, SprmBuffer buf) { - PAPX forInsert = new PAPX(cpStart, cpStart, buf, _dataStream); + boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart); + + PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode); + + // Ensure character offsets are really characters + forInsert.setStart(cpStart); + forInsert.setEnd(cpStart); + if (listIndex == _paragraphs.size()) { _paragraphs.add(forInsert); @@ -90,10 +102,21 @@ public class PAPBinTable { exc.printStackTrace(); } + + // Copy the properties of the one before to afterwards + // Will go: + // Original, until insert at point + // New one + // Clone of original, on to the old end + PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream, needsToBeUnicode); + // Again ensure contains character based offsets no matter what + clone.setStart(cpStart); + clone.setEnd(currentPap.getEnd()); + currentPap.setEnd(cpStart); - PAPX splitPap = new PAPX(cpStart, currentPap.getEnd(), clonedBuf, _dataStream); - _paragraphs.add(++listIndex, forInsert); - _paragraphs.add(++listIndex, splitPap); + + _paragraphs.add(listIndex + 1, forInsert); + _paragraphs.add(listIndex + 2, clone); } else { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java index 979825bf84..20f9b63b98 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java @@ -60,13 +60,14 @@ public class PAPFormattedDiskPage extends FormattedDiskPage /** * Creates a PAPFormattedDiskPage from a 512 byte array */ - public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin) + public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt) { super(documentStream, offset); for (int x = 0; x < _crun; x++) { - _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream)); + boolean isUnicode = tpt.isUnicodeAt( getStart(x) ); + _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode)); } _fkp = null; _dataStream = dataStream; @@ -110,7 +111,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage } /** - * Gets the papx for the paragraph at index in this fkp. + * Gets the papx grpprl for the paragraph at index in this fkp. * * @param index The index of the papx to get. * @return a papx grpprl. @@ -259,7 +260,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2)); grpprlOffset -= (grpprlOffset % 2); } - LittleEndian.putInt(buf, fcOffset, papx.getStart() + fcMin); + LittleEndian.putInt(buf, fcOffset, papx.getStartBytes() + fcMin); buf[bxOffset] = (byte)(grpprlOffset/2); System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length); @@ -287,7 +288,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage } - LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin); + LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin); return buf; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java index a7e259ec73..1e8ae86b52 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java @@ -29,29 +29,32 @@ import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmOperation; /** - * Comment me + * DANGER - works in bytes! + * + * Make sure you call getStart() / getEnd() when you want characters + * (normal use), but getStartByte() / getEndByte() when you're + * reading in / writing out! * * @author Ryan Ackley */ -public class PAPX extends PropertyNode -{ +public class PAPX extends BytePropertyNode { private ParagraphHeight _phe; private int _hugeGrpprlOffset = -1; - public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream) + public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode) { - super(fcStart, fcEnd, new SprmBuffer(papx)); + super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode); _phe = phe; SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream); if(buf != null) _buf = buf; } - public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream) + public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode) { - super(fcStart, fcEnd, buf); + super(fcStart, fcEnd, buf, isUnicode); _phe = new ParagraphHeight(); buf = findHuge(buf, dataStream); if(buf != null) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java index 42c5f5c27e..5ae16aa5e3 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java @@ -22,7 +22,10 @@ import java.util.Arrays; /** * Represents a lightweight node in the Trees used to store content - * properties. Works only in characters. + * properties. + * This only ever works in characters. For the few odd cases when + * the start and end aren't in characters (eg PAPX and CHPX), use + * {@link BytePropertyNode} between you and this. * * @author Ryan Ackley */ diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java index 5e903ecb8a..7e856f1eeb 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java @@ -25,6 +25,7 @@ import org.apache.poi.poifs.common.POIFSConstants; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; /** @@ -62,8 +63,17 @@ public class TextPieceTable pieces[x] = new PieceDescriptor(node.getBytes(), 0); } - int firstPieceFilePosition = pieces[0].getFilePosition(); - _cpMin = firstPieceFilePosition - fcMin; + + // Figure out the cp of the earliest text piece + // Note that text pieces don't have to be stored in order! + _cpMin = pieces[0].getFilePosition() - fcMin; + for (int x = 0; x < pieces.length; x++) { + int start = pieces[x].getFilePosition() - fcMin; + if(start < _cpMin) { + _cpMin = start; + } + } + // using the PieceDescriptors, build our list of TextPieces. for (int x = 0; x < pieces.length; x++) @@ -104,6 +114,35 @@ public class TextPieceTable { return _textPieces; } + + /** + * Is the text at the given Character offset + * unicode, or plain old ascii? + * In a very evil fashion, you have to actually + * know this to make sense of character and + * paragraph properties :( + * @param cp The character offset to check about + */ + public boolean isUnicodeAt(int cp) { + boolean lastWas = false; + int lastAt = 0; + + Iterator it = _textPieces.iterator(); + while(it.hasNext()) { + TextPiece tp = (TextPiece)it.next(); + // If the text piece covers the character, all good + if(tp.getStart() <= cp && tp.getEnd() >= cp) { + return tp.isUnicode(); + } + // Otherwise keep track for the last one + if(tp.getStart() > lastAt) { + lastWas = tp.isUnicode(); + } + } + + // If they ask off the end, just go with the last one... + return lastWas; + } public byte[] writeTo(HWPFOutputStream docStream) throws IOException diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/types/FIBAbstractType.java b/src/scratchpad/src/org/apache/poi/hwpf/model/types/FIBAbstractType.java index 6615823281..63961b4552 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/types/FIBAbstractType.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/types/FIBAbstractType.java @@ -70,10 +70,10 @@ public abstract class FIBAbstractType private static BitField fFutureSavedUndo = BitFieldFactory.getInstance(0x0008); private static BitField fWord97Saved = BitFieldFactory.getInstance(0x0010); private static BitField fSpare0 = BitFieldFactory.getInstance(0x00FE); - protected int field_11_chs; - protected int field_12_chsTables; - protected int field_13_fcMin; - protected int field_14_fcMac; + protected int field_11_chs; /** Latest docs say this is Reserved3! */ + protected int field_12_chsTables; /** Latest docs say this is Reserved4! */ + protected int field_13_fcMin; /** Latest docs say this is Reserved5! */ + protected int field_14_fcMac; /** Latest docs say this is Reserved6! */ public FIBAbstractType() diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java index 07e8bfbf91..d1f1451acd 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java @@ -32,6 +32,8 @@ public class TestCHPBinTable { private CHPBinTable _cHPBinTable = null; private HWPFDocFixture _hWPFDocFixture; + + private TextPieceTable fakeTPT = new TextPieceTable(); public TestCHPBinTable(String name) { @@ -46,7 +48,7 @@ public class TestCHPBinTable byte[] tableStream = _hWPFDocFixture._tableStream; int fcMin = fib.getFcMin(); - _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fcMin); + _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fcMin, fakeTPT); HWPFFileSystem fileSys = new HWPFFileSystem(); @@ -57,7 +59,7 @@ public class TestCHPBinTable byte[] newTableStream = tableOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray(); - CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0); + CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0, fakeTPT); ArrayList oldTextRuns = _cHPBinTable._textRuns; ArrayList newTextRuns = newBinTable._textRuns; diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java index 4358cdef09..446b5232a5 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java @@ -32,6 +32,8 @@ public class TestPAPBinTable private PAPBinTable _pAPBinTable = null; private HWPFDocFixture _hWPFDocFixture; + private TextPieceTable fakeTPT = new TextPieceTable(); + public TestPAPBinTable(String name) { super(name); @@ -45,7 +47,7 @@ public class TestPAPBinTable byte[] tableStream = _hWPFDocFixture._tableStream; int fcMin = fib.getFcMin(); - _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin); + _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin, fakeTPT); HWPFFileSystem fileSys = new HWPFFileSystem(); @@ -56,7 +58,7 @@ public class TestPAPBinTable byte[] newTableStream = tableOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray(); - PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0); + PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0, fakeTPT); ArrayList oldTextRuns = _pAPBinTable.getParagraphs(); ArrayList newTextRuns = newBinTable.getParagraphs(); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java index 2994b6332d..7cbd75d6b8 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java @@ -18,23 +18,19 @@ package org.apache.poi.hwpf.usermodel; -import java.io.ByteArrayOutputStream; import java.io.FileInputStream; -import java.util.List; - -import org.apache.poi.hwpf.HWPFDocument; -import org.apache.poi.hwpf.model.PicturesTable; -import org.apache.poi.hwpf.usermodel.Picture; import junit.framework.TestCase; +import org.apache.poi.hwpf.HWPFDocument; + /** * Test to see if Range.delete() works even if the Range contains a * CharacterRun that uses Unicode characters. * * TODO - re-enable me when unicode paragraph stuff is fixed! */ -public abstract class TestRangeDelete extends TestCase { +public class TestRangeDelete extends TestCase { // u201c and u201d are "smart-quotes" private String originalText = diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java index 5f21508c9a..b4d7470387 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java @@ -18,23 +18,19 @@ package org.apache.poi.hwpf.usermodel; -import java.io.ByteArrayOutputStream; import java.io.FileInputStream; -import java.util.List; - -import org.apache.poi.hwpf.HWPFDocument; -import org.apache.poi.hwpf.model.PicturesTable; -import org.apache.poi.hwpf.usermodel.Picture; import junit.framework.TestCase; +import org.apache.poi.hwpf.HWPFDocument; + /** * Test to see if Range.insertBefore() works even if the Range contains a * CharacterRun that uses Unicode characters. * * TODO - re-enable me when unicode paragraph stuff is fixed! */ -public abstract class TestRangeInsertion extends TestCase { +public class TestRangeInsertion extends TestCase { // u201c and u201d are "smart-quotes" private String originalText = diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java index 1f0aad5aa5..f8a251b693 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java @@ -18,8 +18,10 @@ package org.apache.poi.hwpf.usermodel; import java.io.File; import java.io.FileInputStream; +import java.util.List; import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.model.PropertyNode; import junit.framework.TestCase; @@ -30,7 +32,7 @@ import junit.framework.TestCase; * * TODO - re-enable me when unicode paragraph stuff is fixed! */ -public abstract class TestRangeProperties extends TestCase { +public class TestRangeProperties extends TestCase { private static final char page_break = (char)12; private static final String u_page_1 = @@ -142,8 +144,88 @@ public abstract class TestRangeProperties extends TestCase { assertEquals(22, c1.getFontSize()); assertEquals(32, c7.getFontSize()); } - + /** + * Tests the raw definitions of the paragraphs of + * a unicode document + */ + public void testUnicodeParagraphDefinitions() throws Exception { + Range r = u.getRange(); + String[] p1_parts = u_page_1.split("\r"); + String[] p2_parts = u_page_2.split("\r"); + + assertEquals( + u_page_1 + page_break + "\r" + u_page_2, + r.text() + ); + assertEquals( + 408, r.text().length() + ); + + List pDefs = r._paragraphs; + assertEquals(35, pDefs.size()); + + // Check that the last paragraph ends where it should do + assertEquals(531, u.getOverallRange().text().length()); + assertEquals(530, u.getCPSplitCalculator().getHeaderTextboxEnd()); + PropertyNode pLast = (PropertyNode)pDefs.get(34); +// assertEquals(530, pLast.getEnd()); + + // Only care about the first few really though + PropertyNode p0 = (PropertyNode)pDefs.get(0); + PropertyNode p1 = (PropertyNode)pDefs.get(1); + PropertyNode p2 = (PropertyNode)pDefs.get(2); + PropertyNode p3 = (PropertyNode)pDefs.get(3); + PropertyNode p4 = (PropertyNode)pDefs.get(4); + + // 5 paragraphs should get us to the end of our text + assertTrue(p0.getStart() < 408); + assertTrue(p0.getEnd() < 408); + assertTrue(p1.getStart() < 408); + assertTrue(p1.getEnd() < 408); + assertTrue(p2.getStart() < 408); + assertTrue(p2.getEnd() < 408); + assertTrue(p3.getStart() < 408); + assertTrue(p3.getEnd() < 408); + assertTrue(p4.getStart() < 408); + assertTrue(p4.getEnd() < 408); + + // Paragraphs should match with lines + assertEquals( + 0, + p0.getStart() + ); + assertEquals( + p1_parts[0].length() + 1, + p0.getEnd() + ); + + assertEquals( + p1_parts[0].length() + 1, + p1.getStart() + ); + assertEquals( + p1_parts[0].length() + 1 + + p1_parts[1].length() + 1, + p1.getEnd() + ); + + assertEquals( + p1_parts[0].length() + 1 + + p1_parts[1].length() + 1, + p2.getStart() + ); + assertEquals( + p1_parts[0].length() + 1 + + p1_parts[1].length() + 1 + + p1_parts[2].length() + 1, + p2.getEnd() + ); + } + + /** + * Tests the paragraph text of a unicode document + */ public void testUnicodeTextParagraphs() throws Exception { Range r = u.getRange(); assertEquals( @@ -154,14 +236,25 @@ public abstract class TestRangeProperties extends TestCase { ); assertEquals( - 5, + 12, r.numParagraphs() ); String[] p1_parts = u_page_1.split("\r"); String[] p2_parts = u_page_2.split("\r"); - System.out.println(r.getParagraph(2).text()); - // TODO + // Check text all matches up properly + assertEquals(p1_parts[0] + "\r", r.getParagraph(0).text()); + assertEquals(p1_parts[1] + "\r", r.getParagraph(1).text()); + assertEquals(p1_parts[2] + "\r", r.getParagraph(2).text()); + assertEquals(p1_parts[3] + "\r", r.getParagraph(3).text()); + assertEquals(p1_parts[4] + "\r", r.getParagraph(4).text()); + assertEquals(p1_parts[5] + "\r", r.getParagraph(5).text()); + assertEquals(p1_parts[6] + "\r", r.getParagraph(6).text()); + assertEquals(p1_parts[7] + "\r", r.getParagraph(7).text()); + assertEquals(p1_parts[8] + "\r", r.getParagraph(8).text()); + assertEquals(p1_parts[9] + "\r", r.getParagraph(9).text()); + assertEquals(page_break + "\r", r.getParagraph(10).text()); + assertEquals(p2_parts[0] + "\r", r.getParagraph(11).text()); } public void testUnicodeStyling() throws Exception { // TODO diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java index 67aea65be9..a342fdfd7b 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java @@ -18,23 +18,19 @@ package org.apache.poi.hwpf.usermodel; -import java.io.ByteArrayOutputStream; import java.io.FileInputStream; -import java.util.List; - -import org.apache.poi.hwpf.HWPFDocument; -import org.apache.poi.hwpf.model.PicturesTable; -import org.apache.poi.hwpf.usermodel.Picture; import junit.framework.TestCase; +import org.apache.poi.hwpf.HWPFDocument; + /** * Test to see if Range.replaceText() works even if the Range contains a * CharacterRun that uses Unicode characters. * * TODO - re-enable me when unicode paragraph stuff is fixed! */ -public abstract class TestRangeReplacement extends TestCase { +public class TestRangeReplacement extends TestCase { // u201c and u201d are "smart-quotes" private String originalText =