From 145e221f877ebbde9e9f291702dcc046b17063d8 Mon Sep 17 00:00:00 2001 From: Maxim Valyanskiy Date: Mon, 26 Jul 2010 14:04:27 +0000 Subject: More fixes for auto-saved documents git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@979286 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hwpf/model/CHPFormattedDiskPage.java | 7 +- .../src/org/apache/poi/hwpf/model/CHPX.java | 4 +- .../apache/poi/hwpf/model/CharIndexTranslator.java | 29 +++-- .../org/apache/poi/hwpf/model/PicturesTable.java | 6 +- .../org/apache/poi/hwpf/model/TextPieceTable.java | 117 ++++++++++----------- .../src/org/apache/poi/hwpf/usermodel/Range.java | 8 +- .../org/apache/poi/hwpf/model/TestCHPBinTable.java | 7 +- .../apache/poi/hwpf/usermodel/TestPictures.java | 6 ++ test-data/document/o_kurs.doc | Bin 0 -> 202240 bytes 9 files changed, 111 insertions(+), 73 deletions(-) create mode 100644 test-data/document/o_kurs.doc diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java index 628fb75d14..10bb77d851 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java @@ -62,7 +62,12 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage { int startAt = getStart(x); int endAt = getEnd(x); - _chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x))); + + if (!tpt.isIndexInTable(startAt) && !tpt.isIndexInTable(endAt)) { + _chpxList.add(null); + } else { + _chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x))); + } } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java index f56621afad..b7c4db7e99 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java @@ -36,12 +36,12 @@ public final class CHPX extends BytePropertyNode public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl) { - super(fcStart, fcEnd, translator, new SprmBuffer(grpprl)); + super(fcStart, translator.lookIndexBackward(fcEnd), translator, new SprmBuffer(grpprl)); } public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf) { - super(fcStart, fcEnd, translator ,buf); + super(fcStart, translator.lookIndexBackward(fcEnd), translator ,buf); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java index d2cc0ebb4d..cda2fb26a5 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java @@ -18,9 +18,9 @@ package org.apache.poi.hwpf.model; public interface CharIndexTranslator { - /** * Calculates the char index of the given byte index. + * Look forward if index is not in table * * @param bytePos The character offset to check * @return the char index @@ -28,13 +28,28 @@ public interface CharIndexTranslator { int getCharIndex(int bytePos); /** - * Is the text at the given byte offset unicode, or plain old ascii? In a - * very evil fashion, you have to actually know this to make sense of - * character and paragraph properties :( + * Check if index is in table + * + * @param bytePos + * @return true if index in table, false if not + */ + + boolean isIndexInTable(int bytePos); + + /** + * Return first index >= bytePos that is in table + * + * @param bytePos + * @return + */ + public int lookIndexForward(int bytePos); + + /** + * Return last index <= bytePos that is in table * - * @param bytePos The character offset to check about - * @return true if the text at the given byte offset is unicode + * @param bytePos + * @return */ - boolean isUnicodeAtByteOffset(int bytePos); + public int lookIndexBackward(int bytePos); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java index 64046eadcb..1ffcaaa6c3 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java @@ -196,7 +196,11 @@ public final class PicturesTable Range range = _document.getOverallRange(); for (int i = 0; i < range.numCharacterRuns(); i++) { CharacterRun run = range.getCharacterRun(i); - String text = run.text(); + + if (run==null) { + continue; + } + Picture picture = extractPicture(run, false); if (picture != null) { pictures.add(picture); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java index 11a5b372ee..cfff0b2937 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java @@ -33,7 +33,7 @@ import java.util.List; * * @author Ryan Ackley */ -public final class TextPieceTable implements CharIndexTranslator { +public class TextPieceTable implements CharIndexTranslator { protected ArrayList _textPieces = new ArrayList(); protected ArrayList _textPiecesFCOrder = new ArrayList(); // int _multiple; @@ -118,51 +118,6 @@ public final class TextPieceTable implements CharIndexTranslator { Collections.sort(_textPiecesFCOrder, new FCComparator()); } - /** - * Is the text at the given Character offset unicode, or plain old ascii? In - * a very evil fashion, you have to actually know this to make sense of - * character and paragraph properties :( - * - * @param cp - * The character offset to check about - */ - public boolean isUnicodeAtCharOffset(int cp) { - boolean lastWas = false; - - for(TextPiece tp : _textPieces) { - // If the text piece covers the character, all good - if (tp.getStart() <= cp && tp.getEnd() >= cp) { - return tp.isUnicode(); - } - // Otherwise keep track for the last one - lastWas = tp.isUnicode(); - } - - // If they ask off the end, just go with the last one... - return lastWas; - } - - public boolean isUnicodeAtByteOffset(int bytePos) { - boolean lastWas = false; - - for(TextPiece tp : _textPieces) { - int curByte = tp.getPieceDescriptor().getFilePosition(); - int pieceEnd = curByte + tp.bytesLength(); - - // If the text piece covers the character, all good - if (curByte <= bytePos && pieceEnd > bytePos) { - return tp.isUnicode(); - } - // Otherwise keep track for the last one - lastWas = tp.isUnicode(); - // Move along - curByte = pieceEnd; - } - - // If they ask off the end, just go with the last one... - return lastWas; - } - public byte[] writeTo(HWPFOutputStream docStream) throws IOException { PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes()); @@ -245,19 +200,7 @@ public final class TextPieceTable implements CharIndexTranslator { public int getCharIndex(int bytePos) { int charCount = 0; - for(TextPiece tp : _textPiecesFCOrder) { - int pieceStart = tp.getPieceDescriptor().getFilePosition(); - - if (bytePos > pieceStart + tp.bytesLength()) { - continue; - } - - if (pieceStart > bytePos) { - bytePos = pieceStart; - } - - break; - } + bytePos = lookIndexForward(bytePos); for(TextPiece tp : _textPieces) { int pieceStart = tp.getPieceDescriptor().getFilePosition(); @@ -287,6 +230,62 @@ public final class TextPieceTable implements CharIndexTranslator { return charCount; } + public int lookIndexForward(int bytePos) { + for(TextPiece tp : _textPiecesFCOrder) { + int pieceStart = tp.getPieceDescriptor().getFilePosition(); + + if (bytePos > pieceStart + tp.bytesLength()) { + continue; + } + + if (pieceStart > bytePos) { + bytePos = pieceStart; + } + + break; + } + return bytePos; + } + + public int lookIndexBackward(int bytePos) { + int lastEnd = 0; + + for(TextPiece tp : _textPiecesFCOrder) { + int pieceStart = tp.getPieceDescriptor().getFilePosition(); + + if (bytePos > pieceStart + tp.bytesLength()) { + lastEnd = pieceStart + tp.bytesLength(); + continue; + } + + if (pieceStart > bytePos) { + bytePos = lastEnd; + } + + break; + } + + return bytePos; + } + + public boolean isIndexInTable(int bytePos) { + for(TextPiece tp : _textPiecesFCOrder) { + int pieceStart = tp.getPieceDescriptor().getFilePosition(); + + if (bytePos > pieceStart + tp.bytesLength()) { + continue; + } + + if (pieceStart > bytePos) { + return false; + } + + return true; + } + + return false; + } + private static class FCComparator implements Comparator { public int compare(TextPiece textPiece, TextPiece textPiece1) { if (textPiece.getPieceDescriptor().fc>textPiece1.getPieceDescriptor().fc) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index 4d78ca9ba3..7c9b541d7c 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -784,6 +784,10 @@ public class Range { // TODO -instantiable superclass public CharacterRun getCharacterRun(int index) { initCharacterRuns(); CHPX chpx = _characters.get(index + _charStart); + + if (chpx == null) { + return null; + } int[] point = findRange(_paragraphs, _parStart, Math.max(chpx.getStart(), _start), chpx .getEnd()); @@ -963,7 +967,7 @@ public class Range { // TODO -instantiable superclass int x = min; PropertyNode node = (PropertyNode) rpl.get(x); - while (node.getEnd() <= start && x < rpl.size() - 1) { + while (node==null || (node.getEnd() <= start && x < rpl.size() - 1)) { x++; node = (PropertyNode) rpl.get(x); } @@ -978,7 +982,7 @@ public class Range { // TODO -instantiable superclass int y = x; node = (PropertyNode) rpl.get(y); - while (node.getEnd() < end && y < rpl.size() - 1) { + while (node==null || (node.getEnd() < end && y < rpl.size() - 1)) { y++; node = (PropertyNode) rpl.get(y); } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java index 4ad028aac4..7179beba68 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java @@ -31,7 +31,12 @@ public final class TestCHPBinTable private CHPBinTable _cHPBinTable = null; private HWPFDocFixture _hWPFDocFixture; - private TextPieceTable fakeTPT = new TextPieceTable(); + private TextPieceTable fakeTPT = new TextPieceTable() { + @Override + public boolean isIndexInTable(int bytePos) { + return true; + } + }; public void testReadWrite() throws Exception diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java index 05013b07fb..a5c9c501e9 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java @@ -157,4 +157,10 @@ public final class TestPictures extends TestCase { doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception } + public void testFastSaved2() { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("o_kurs.doc"); + + doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception + } + } diff --git a/test-data/document/o_kurs.doc b/test-data/document/o_kurs.doc new file mode 100644 index 0000000000..caab02ae9c Binary files /dev/null and b/test-data/document/o_kurs.doc differ -- cgit v1.2.3