aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java7
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java4
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java29
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java6
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java117
-rw-r--r--src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java8
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java7
-rw-r--r--src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java6
-rw-r--r--test-data/document/o_kurs.docbin0 -> 202240 bytes
9 files changed, 111 insertions, 73 deletions
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
index 628fb75d14..10bb77d851 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java
@@ -62,7 +62,12 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage
{
int startAt = getStart(x);
int endAt = getEnd(x);
- _chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
+
+ if (!tpt.isIndexInTable(startAt) && !tpt.isIndexInTable(endAt)) {
+ _chpxList.add(null);
+ } else {
+ _chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x)));
+ }
}
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
index f56621afad..b7c4db7e99 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java
@@ -36,12 +36,12 @@ public final class CHPX extends BytePropertyNode
public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl)
{
- super(fcStart, fcEnd, translator, new SprmBuffer(grpprl));
+ super(fcStart, translator.lookIndexBackward(fcEnd), translator, new SprmBuffer(grpprl));
}
public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf)
{
- super(fcStart, fcEnd, translator ,buf);
+ super(fcStart, translator.lookIndexBackward(fcEnd), translator ,buf);
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
index d2cc0ebb4d..cda2fb26a5 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
@@ -18,9 +18,9 @@
package org.apache.poi.hwpf.model;
public interface CharIndexTranslator {
-
/**
* Calculates the char index of the given byte index.
+ * Look forward if index is not in table
*
* @param bytePos The character offset to check
* @return the char index
@@ -28,13 +28,28 @@ public interface CharIndexTranslator {
int getCharIndex(int bytePos);
/**
- * Is the text at the given byte offset unicode, or plain old ascii? In a
- * very evil fashion, you have to actually know this to make sense of
- * character and paragraph properties :(
+ * Check if index is in table
+ *
+ * @param bytePos
+ * @return true if index in table, false if not
+ */
+
+ boolean isIndexInTable(int bytePos);
+
+ /**
+ * Return first index >= bytePos that is in table
+ *
+ * @param bytePos
+ * @return
+ */
+ public int lookIndexForward(int bytePos);
+
+ /**
+ * Return last index <= bytePos that is in table
*
- * @param bytePos The character offset to check about
- * @return true if the text at the given byte offset is unicode
+ * @param bytePos
+ * @return
*/
- boolean isUnicodeAtByteOffset(int bytePos);
+ public int lookIndexBackward(int bytePos);
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
index 64046eadcb..1ffcaaa6c3 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
@@ -196,7 +196,11 @@ public final class PicturesTable
Range range = _document.getOverallRange();
for (int i = 0; i < range.numCharacterRuns(); i++) {
CharacterRun run = range.getCharacterRun(i);
- String text = run.text();
+
+ if (run==null) {
+ continue;
+ }
+
Picture picture = extractPicture(run, false);
if (picture != null) {
pictures.add(picture);
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
index 11a5b372ee..cfff0b2937 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@@ -33,7 +33,7 @@ import java.util.List;
*
* @author Ryan Ackley
*/
-public final class TextPieceTable implements CharIndexTranslator {
+public class TextPieceTable implements CharIndexTranslator {
protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
protected ArrayList<TextPiece> _textPiecesFCOrder = new ArrayList<TextPiece>();
// int _multiple;
@@ -118,51 +118,6 @@ public final class TextPieceTable implements CharIndexTranslator {
Collections.sort(_textPiecesFCOrder, new FCComparator());
}
- /**
- * Is the text at the given Character offset unicode, or plain old ascii? In
- * a very evil fashion, you have to actually know this to make sense of
- * character and paragraph properties :(
- *
- * @param cp
- * The character offset to check about
- */
- public boolean isUnicodeAtCharOffset(int cp) {
- boolean lastWas = false;
-
- for(TextPiece tp : _textPieces) {
- // If the text piece covers the character, all good
- if (tp.getStart() <= cp && tp.getEnd() >= cp) {
- return tp.isUnicode();
- }
- // Otherwise keep track for the last one
- lastWas = tp.isUnicode();
- }
-
- // If they ask off the end, just go with the last one...
- return lastWas;
- }
-
- public boolean isUnicodeAtByteOffset(int bytePos) {
- boolean lastWas = false;
-
- for(TextPiece tp : _textPieces) {
- int curByte = tp.getPieceDescriptor().getFilePosition();
- int pieceEnd = curByte + tp.bytesLength();
-
- // If the text piece covers the character, all good
- if (curByte <= bytePos && pieceEnd > bytePos) {
- return tp.isUnicode();
- }
- // Otherwise keep track for the last one
- lastWas = tp.isUnicode();
- // Move along
- curByte = pieceEnd;
- }
-
- // If they ask off the end, just go with the last one...
- return lastWas;
- }
-
public byte[] writeTo(HWPFOutputStream docStream) throws IOException {
PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes());
@@ -245,19 +200,7 @@ public final class TextPieceTable implements CharIndexTranslator {
public int getCharIndex(int bytePos) {
int charCount = 0;
- for(TextPiece tp : _textPiecesFCOrder) {
- int pieceStart = tp.getPieceDescriptor().getFilePosition();
-
- if (bytePos > pieceStart + tp.bytesLength()) {
- continue;
- }
-
- if (pieceStart > bytePos) {
- bytePos = pieceStart;
- }
-
- break;
- }
+ bytePos = lookIndexForward(bytePos);
for(TextPiece tp : _textPieces) {
int pieceStart = tp.getPieceDescriptor().getFilePosition();
@@ -287,6 +230,62 @@ public final class TextPieceTable implements CharIndexTranslator {
return charCount;
}
+ public int lookIndexForward(int bytePos) {
+ for(TextPiece tp : _textPiecesFCOrder) {
+ int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+ if (bytePos > pieceStart + tp.bytesLength()) {
+ continue;
+ }
+
+ if (pieceStart > bytePos) {
+ bytePos = pieceStart;
+ }
+
+ break;
+ }
+ return bytePos;
+ }
+
+ public int lookIndexBackward(int bytePos) {
+ int lastEnd = 0;
+
+ for(TextPiece tp : _textPiecesFCOrder) {
+ int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+ if (bytePos > pieceStart + tp.bytesLength()) {
+ lastEnd = pieceStart + tp.bytesLength();
+ continue;
+ }
+
+ if (pieceStart > bytePos) {
+ bytePos = lastEnd;
+ }
+
+ break;
+ }
+
+ return bytePos;
+ }
+
+ public boolean isIndexInTable(int bytePos) {
+ for(TextPiece tp : _textPiecesFCOrder) {
+ int pieceStart = tp.getPieceDescriptor().getFilePosition();
+
+ if (bytePos > pieceStart + tp.bytesLength()) {
+ continue;
+ }
+
+ if (pieceStart > bytePos) {
+ return false;
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
private static class FCComparator implements Comparator<TextPiece> {
public int compare(TextPiece textPiece, TextPiece textPiece1) {
if (textPiece.getPieceDescriptor().fc>textPiece1.getPieceDescriptor().fc) {
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
index 4d78ca9ba3..7c9b541d7c 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
@@ -784,6 +784,10 @@ public class Range { // TODO -instantiable superclass
public CharacterRun getCharacterRun(int index) {
initCharacterRuns();
CHPX chpx = _characters.get(index + _charStart);
+
+ if (chpx == null) {
+ return null;
+ }
int[] point = findRange(_paragraphs, _parStart, Math.max(chpx.getStart(), _start), chpx
.getEnd());
@@ -963,7 +967,7 @@ public class Range { // TODO -instantiable superclass
int x = min;
PropertyNode node = (PropertyNode) rpl.get(x);
- while (node.getEnd() <= start && x < rpl.size() - 1) {
+ while (node==null || (node.getEnd() <= start && x < rpl.size() - 1)) {
x++;
node = (PropertyNode) rpl.get(x);
}
@@ -978,7 +982,7 @@ public class Range { // TODO -instantiable superclass
int y = x;
node = (PropertyNode) rpl.get(y);
- while (node.getEnd() < end && y < rpl.size() - 1) {
+ while (node==null || (node.getEnd() < end && y < rpl.size() - 1)) {
y++;
node = (PropertyNode) rpl.get(y);
}
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
index 4ad028aac4..7179beba68 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java
@@ -31,7 +31,12 @@ public final class TestCHPBinTable
private CHPBinTable _cHPBinTable = null;
private HWPFDocFixture _hWPFDocFixture;
- private TextPieceTable fakeTPT = new TextPieceTable();
+ private TextPieceTable fakeTPT = new TextPieceTable() {
+ @Override
+ public boolean isIndexInTable(int bytePos) {
+ return true;
+ }
+ };
public void testReadWrite()
throws Exception
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java
index 05013b07fb..a5c9c501e9 100644
--- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestPictures.java
@@ -157,4 +157,10 @@ public final class TestPictures extends TestCase {
doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
}
+ public void testFastSaved2() {
+ HWPFDocument doc = HWPFTestDataSamples.openSampleFile("o_kurs.doc");
+
+ doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
+ }
+
}
diff --git a/test-data/document/o_kurs.doc b/test-data/document/o_kurs.doc
new file mode 100644
index 0000000000..caab02ae9c
--- /dev/null
+++ b/test-data/document/o_kurs.doc
Binary files differ