From f7c0cc0c42399f56cc18514c9f18a9d4f14f9adb Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Fri, 19 Jun 2009 13:45:55 +0000 Subject: [PATCH] improved HWPF to better handle unicode, patch provided by Benjamin Engele and Maxim Valyanskiy, see Bugzilla #46610 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@786505 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../poi/hwpf/model/BytePropertyNode.java | 29 +++---- .../apache/poi/hwpf/model/CHPBinTable.java | 5 +- .../poi/hwpf/model/CHPFormattedDiskPage.java | 5 +- .../src/org/apache/poi/hwpf/model/CHPX.java | 8 +- .../poi/hwpf/model/CharIndexTranslator.java | 40 ++++++++++ .../apache/poi/hwpf/model/PAPBinTable.java | 5 +- .../poi/hwpf/model/PAPFormattedDiskPage.java | 10 +-- .../src/org/apache/poi/hwpf/model/PAPX.java | 8 +- .../src/org/apache/poi/hwpf/model/SEPX.java | 4 +- .../apache/poi/hwpf/model/SectionTable.java | 37 ++------- .../apache/poi/hwpf/model/TextPieceTable.java | 50 ++++++++---- .../org/apache/poi/hwpf/data/Bug46610_1.doc | Bin 0 -> 27136 bytes .../org/apache/poi/hwpf/data/Bug46610_2.doc | Bin 0 -> 28672 bytes .../org/apache/poi/hwpf/data/Bug46610_3.doc | Bin 0 -> 53760 bytes .../poi/hwpf/usermodel/TestBug46610.java | 72 ++++++++++++++++++ 16 files changed, 187 insertions(+), 87 deletions(-) create mode 100755 src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java create mode 100755 src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_1.doc create mode 100755 src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_2.doc create mode 100755 src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_3.doc create mode 100755 src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 764cb7375d..bf8b7bd95b 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,6 +33,7 @@ + 46610 - Improved HWPF to better handle unicode 47261 - Fixed SlideShow#removeSlide to remove references to Notes 47375 - Fixed HSSFHyperlink to correctly set inter-sheet and file links 47384 - Fixed ExternalNameRecord to handle unicode names diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java index d4c0b1fb7a..1753fdbd91 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java @@ -25,37 +25,28 @@ package org.apache.poi.hwpf.model; * and characters. */ public abstract class BytePropertyNode extends PropertyNode { - private boolean isUnicode; + private final int startBytes; + private final int endBytes; /** * @param fcStart The start of the text for this property, in _bytes_ * @param fcEnd The end of the text for this property, in _bytes_ */ - public BytePropertyNode(int fcStart, int fcEnd, Object buf, boolean isUnicode) { + public BytePropertyNode(int fcStart, int fcEnd, CharIndexTranslator translator, Object buf) { super( - generateCp(fcStart, isUnicode), - generateCp(fcEnd, isUnicode), + translator.getCharIndex(fcStart), + translator.getCharIndex(fcEnd), buf ); - this.isUnicode = isUnicode; - } - private static int generateCp(int val, boolean isUnicode) { - if(isUnicode) - return val/2; - return val; + this.startBytes = fcStart; + this.endBytes = fcEnd; } - public boolean isUnicode() { - return isUnicode; - } public int getStartBytes() { - if(isUnicode) - return getStart()*2; - return getStart(); + return startBytes; } + public int getEndBytes() { - if(isUnicode) - return getEnd()*2; - return getEnd(); + return endBytes; } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java index 1374bd67d5..d8b51036b9 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java @@ -119,9 +119,8 @@ public final class CHPBinTable public void insert(int listIndex, int cpStart, SprmBuffer buf) { - boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart); - CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode); + CHPX insertChpx = new CHPX(0, 0, tpt,buf); // Ensure character offsets are really characters insertChpx.setStart(cpStart); @@ -141,7 +140,7 @@ public final class CHPBinTable // Original, until insert at point // New one // Clone of original, on to the old end - CHPX clone = new CHPX(0, 0, chpx.getSprmBuf(), needsToBeUnicode); + CHPX clone = new CHPX(0, 0, tpt,chpx.getSprmBuf()); // Again ensure contains character based offsets no matter what clone.setStart(cpStart); clone.setEnd(chpx.getEnd()); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java index fa24a78b0d..9f5d724bdd 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java @@ -60,8 +60,9 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage for (int x = 0; x < _crun; x++) { - boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) ); - _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode)); + int startAt = getStart(x); + int endAt = getEnd(x); + _chpxList.add(new CHPX(startAt, endAt, tpt, getGrpprl(x))); } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java index f2dc4c3d7f..b78cdffc57 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPX.java @@ -34,14 +34,14 @@ import org.apache.poi.hwpf.sprm.CharacterSprmUncompressor; public final class CHPX extends BytePropertyNode { - public CHPX(int fcStart, int fcEnd, byte[] grpprl, boolean isUnicode) + public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl) { - super(fcStart, fcEnd, new SprmBuffer(grpprl), isUnicode); + super(fcStart, fcEnd, translator, new SprmBuffer(grpprl)); } - public CHPX(int fcStart, int fcEnd, SprmBuffer buf, boolean isUnicode) + public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf) { - super(fcStart, fcEnd, buf, isUnicode); + super(fcStart, fcEnd, translator ,buf); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java new file mode 100755 index 0000000000..d2cc0ebb4d --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java @@ -0,0 +1,40 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.model; + +public interface CharIndexTranslator { + + /** + * Calculates the char index of the given byte index. + * + * @param bytePos The character offset to check + * @return the char index + */ + int getCharIndex(int bytePos); + + /** + * Is the text at the given byte offset unicode, or plain old ascii? In a + * very evil fashion, you have to actually know this to make sense of + * character and paragraph properties :( + * + * @param bytePos The character offset to check about + * @return true if the text at the given byte offset is unicode + */ + boolean isUnicodeAtByteOffset(int bytePos); + +} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java index 66446dfaa1..1aaeec0cf2 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java @@ -76,9 +76,8 @@ public final class PAPBinTable public void insert(int listIndex, int cpStart, SprmBuffer buf) { - boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart); - PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode); + PAPX forInsert = new PAPX(0, 0, tpt, buf, _dataStream); // Ensure character offsets are really characters forInsert.setStart(cpStart); @@ -108,7 +107,7 @@ public final class PAPBinTable // Original, until insert at point // New one // Clone of original, on to the old end - PAPX clone = new PAPX(0, 0, clonedBuf, _dataStream, needsToBeUnicode); + PAPX clone = new PAPX(0, 0, tpt, clonedBuf, _dataStream); // Again ensure contains character based offsets no matter what clone.setStart(cpStart); clone.setEnd(currentPap.getEnd()); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java index ab7f8155bf..755c37b407 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java @@ -62,14 +62,10 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt) { super(documentStream, offset); - for (int x = 0; x < _crun; x++) { - int startAt = getStart(x) - fcMin; - int endAt = getEnd(x) - fcMin; - boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt); - //System.err.println(startAt + " -> " + endAt + " = " + isUnicode); - - _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode)); + int startAt = getStart(x); + int endAt = getEnd(x); + _papxList.add(new PAPX(startAt, endAt, tpt, getGrpprl(x), getParagraphHeight(x), dataStream)); } _fkp = null; _dataStream = dataStream; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java index 73c1c8edd7..dcd2c18cf3 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java @@ -40,18 +40,18 @@ public final class PAPX extends BytePropertyNode { private ParagraphHeight _phe; private int _hugeGrpprlOffset = -1; - public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream, boolean isUnicode) + public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream) { - super(fcStart, fcEnd, new SprmBuffer(papx), isUnicode); + super(fcStart, fcEnd, translator, new SprmBuffer(papx)); _phe = phe; SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream); if(buf != null) _buf = buf; } - public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream, boolean isUnicode) + public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf, byte[] dataStream) { - super(fcStart, fcEnd, buf, isUnicode); + super(fcStart, fcEnd, translator, buf); _phe = new ParagraphHeight(); buf = findHuge(buf, dataStream); if(buf != null) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java index 7095f0dcf5..77030742d3 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java @@ -28,9 +28,9 @@ public final class SEPX extends BytePropertyNode SectionDescriptor _sed; - public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode) + public SEPX(SectionDescriptor sed, int start, int end, CharIndexTranslator translator, byte[] grpprl) { - super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode); + super(start, end, translator, SectionSprmUncompressor.uncompressSEP(grpprl, 0)); _sed = sed; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java index 2b15808c29..3b47c1e91c 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java @@ -61,13 +61,10 @@ public final class SectionTable int startAt = CPtoFC(node.getStart()); int endAt = CPtoFC(node.getEnd()); - boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt ); -// System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart); - // check for the optimization if (fileOffset == 0xffffffff) { - _sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart)); + _sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0])); } else { @@ -76,7 +73,7 @@ public final class SectionTable byte[] buf = new byte[sepxSize]; fileOffset += LittleEndian.SHORT_SIZE; System.arraycopy(documentStream, fileOffset, buf, 0, buf.length); - _sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart)); + _sections.add(new SEPX(sed, startAt, endAt, tpt, buf)); } } @@ -138,33 +135,13 @@ public final class SectionTable } int FC = TP.getPieceDescriptor().getFilePosition(); int offset = CP - TP.getCP(); - FC = FC+offset-((TextPiece)_text.get(0)).getPieceDescriptor().getFilePosition(); + if (TP.isUnicode()) { + offset = offset*2; + } + FC = FC+offset; return FC; } - // Ryans code - private int FCtoCP(int fc) - { - int size = _text.size(); - int cp = 0; - for (int x = 0; x < size; x++) - { - TextPiece piece = (TextPiece)_text.get(x); - - if (fc <= piece.getEnd()) - { - cp += (fc - piece.getStart()); - break; - } - else - { - cp += (piece.getEnd() - piece.getStart()); - } - } - return cp; - } - - public ArrayList getSections() { return _sections; @@ -205,7 +182,7 @@ public final class SectionTable // Line using Ryan's FCtoCP() conversion method - // unable to observe any effect on our testcases when using this code - piers - GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStartBytes()), FCtoCP(sepx.getEndBytes()), sed.toByteArray()); + GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray()); plex.addProperty(property); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java index 69e1f0ba79..16dd648c7c 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java @@ -37,7 +37,7 @@ import java.util.List; * convertion. * @author Ryan Ackley */ -public final class TextPieceTable +public final class TextPieceTable implements CharIndexTranslator { protected ArrayList _textPieces = new ArrayList(); //int _multiple; @@ -150,31 +150,25 @@ public final class TextPieceTable // If they ask off the end, just go with the last one... return lastWas; } - /** - * Is the text at the given byte offset - * unicode, or plain old ascii? - * In a very evil fashion, you have to actually - * know this to make sense of character and - * paragraph properties :( - * @param bytePos The character offset to check about - */ + public boolean isUnicodeAtByteOffset(int bytePos) { boolean lastWas = false; - int curByte = 0; + Iterator it = _textPieces.iterator(); while(it.hasNext()) { TextPiece tp = (TextPiece)it.next(); - int nextByte = curByte + tp.bytesLength(); + int curByte = tp.getPieceDescriptor().getFilePosition(); + int pieceEnd = curByte + tp.bytesLength(); // If the text piece covers the character, all good - if(curByte <= bytePos && nextByte >= bytePos) { + if(curByte <= bytePos && pieceEnd > bytePos) { return tp.isUnicode(); } // Otherwise keep track for the last one lastWas = tp.isUnicode(); // Move along - curByte = nextByte; + curByte = pieceEnd; } // If they ask off the end, just go with the last one... @@ -268,4 +262,34 @@ public final class TextPieceTable } return false; } + /* (non-Javadoc) + * @see org.apache.poi.hwpf.model.CharIndexTranslator#getLengthInChars(int) + */ + public int getCharIndex(int bytePos) { + int charCount = 0; + + Iterator it = _textPieces.iterator(); + while (it.hasNext()) { + TextPiece tp = (TextPiece) it.next(); + int pieceStart = tp.getPieceDescriptor().getFilePosition(); + if(pieceStart >= bytePos) { + break; + } + + int bytesLength = tp.bytesLength(); + int pieceEnd = pieceStart + bytesLength; + + int toAdd = bytePos > pieceEnd ? bytesLength : bytesLength + - (pieceEnd - bytePos); + + if (tp.isUnicode()) { + charCount += toAdd / 2; + } else { + charCount += toAdd; + } + } + + return charCount; + } + } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_1.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_1.doc new file mode 100755 index 0000000000000000000000000000000000000000..4291d9c1d69cc6f0ca8ff7b2ad987a796923c4ab GIT binary patch literal 27136 zcmeHQ2Ut_d7M_Fz0)kWlK_x^)6bMy8RuL2sREphQYk){k5D*I@2(lC#iUk!FT?=AE zQBlF}3a(vO0a*mavUYUsE$^J0T*WA^?%Vf$Z{N%C{r5~gGjnF{+&Od44QJ12*RR@U zbd|_CSE5ATNmPk)lXwRBZRxNUA@<Kwp9 zBm_nl9ET9D8X+x7V0d8oO>k7~M;8^AK5>NHGiEU;Jj2NVPD4sQxk@CO@8g>D(-}%A z&kEV}LwZeDn)W5CM;8Z*gMh(jvT{wSGsN>DvpP^08Ku*w7{Dxto_NWa{TVfFg|!eO zPw(p!!i4yTK+$~-HG_PGhJ>t!g8hvN83$oqXpbo}kaM&g=z8UnKWPu0kEY1CsiXPP zl4t)0Dt zW0$T@&fV_BjEb7|SkFg5( zKP~~-=eGlx04xBV0CoU-KoB4pP!6a75YPvh02V+QpaxI}XaRHp`T#>fI{@kqZ(h84 z@ucy|z2C3@USECgl=QFqe_Wn_TlbXpL)8NF2+>puN1oEq5Dr7VRj8)RT?pfqKrWiL zHBY6&N5h{|w2Qcmq8G&rC~D(AiJ}uCHdC}gr6X+QWn+xvYo$s^HsA>W$Cv}aF>VRK zG1dd%7#jiF0O)ak)wIw5cA&8-w+BrFe{af1tUqdX(>njre$+u=EOmk#;haWq4Iv@H z)MSCZnarnUPwcWo87|Rvz3YidQ3{`1bj;nfD3i?tw9n3Dn`Mhw9f}AsC_2hC1gK{p zMWt5Z6cSKKKp_Ez1QZfbNI)S0g#;85P)I-_0fht<5>QA$ApwO1{vi?wXXz3hI2~%i z&?GAGCIJ87^bd~(skgo>fPM*G;qBj_As$YQ{tV6WaD}qXrTr)33!uL=Uo=pl{ms$c zUy(O2>QaleChjDV!~*v?GKE-wQeP|yBB4-o7+70j zOJd-PZaBpJAt?r`-d0+Vt+FPxT?{#*AYUXALeB?-FA#i@R2bfi5QAqhZ5=Nu6NZ{n zq}0QP7y(8zNef_QOCq>WB!|%2N7hPP-V+A9!h@cE1y`W7DuWaOl&mscd{NT|<{TRJjp!{F#R)}mV+K!-7RHX^Pl}4%H19hx-#iHTO{H4U$Cvksp-!%_8k-Nbvl1NL&PRE^cUe4`_4KHB-lL;uC%o+3!lN>ON@|6T zZB}_ve&g$x>SoEgErxF_cM$xr`n=ZRdaEK2Tb1x>zdO|JIvv<>f0|yHYpOxjDVD>5 zkz=3sykm1qc|?`wrd@q*UmKZp)by=&&Au((d5p{)BoN{Aggpsi}e7w$>dj>?Lp8haxU+THU0q2hxc8IrGo88;yr$lnq zA}N$-Tw{E<#=l%yt&}rk&BNy#dc4~*^X1hAx48OC3zd&t>)rE2=+*ch2a-~br>$Av zfiq%K^p8I!)a)En7I5yT?x*aG*Lv@=U8tj(QSDzga7(Y?bH}yL_w}rC3^YqxKX3e) zN|W3bt?Rq?zwlIlV5QU8EprQwONx)!o?PL7C~2$J4NsQHX+i(6>w&8ZY#Ijb%PtTW z{@U*B&fPCBz5Q%QBPK7>PlXO<0onlKLWNO6n2#gm#by08zPHL#(Z~{>;%AgJPPJNZ zTyWXD&CgbmSIb-7ygkYA_>S4Sbv_lb0}|U`%ud_9vzB{lr{zV{hgP+t_n3wEnAvC8 z?-!nTy5@P;(p-({{Q1T={NU4ZF+8A#fQcV&HkveB+66mo_Py_Y#m1h7``bS<@k$T} zb&NOaGoi3*n{nvC)m7Ga#+Q9}W%;=HkS-2>cZ|d5dV6Q2I0s}JtFQ)#ci`WsWM$=h zzimQ;DRs{=QoY$Y{*uQLc-qg8_xgI|I>K*LP z9?dK*PfktU`(wq1d-l^?+C?NB>|EcccZ~n!48ewa&zg-(=9cUjKS6bC$g?6gep?da&*0=f;~YN3au>#)h}6y!&+TzA5f!NYTU8!l8@oPkAM6zH&Tl zk=wJo-S3Mwe;={?h0fcB-Ik14?zg`Bm*8z48Wq!`PF{sp&ORkn5xeQvf)KMT*vtWPu(@^I<*cm;q95(FfV_2?7_RN#aF62j5%!`HqQH1%DTe)iMfFl zS*`aiJ~M67%X_LW`z23aygO5_*{)5U0+D0>DfIU#;-_v`r`Texhs?2R-4rI+>|U<>)!cDx}&*Hty!PYmF4-l z+p;XXwa)zGzM)s{qKT2_tB$H(8rk{x=Yy~3i~a0Qlr>bGGu^@#w70Y9 z-u?A9*H3Mq)wZ60Yts?WwXH;jWb}H~wkLN@uhQ=pxWw14-CsX{ZmYe$2dG@=o1=caB6D5!x+tU6 zG3$o7q#kHnbMJ~*+tYv9L7nIB9M6&yo^EyTmX@Y< zJl<})?MiX?hkYNE&cFNiR=)?Ur@BV0T7Ge;Md=h#(V%Th<~97>{&B8-*MLD@@oSdM z@8Fn~uQ_wdPu+O)ubo`r)!4=IUQ51S)jju=jdM3-DQo))4sS_V>TTh0ew9b3#nC$) zPAuHfrQNzXq2`aiiNQr|_rRSeluFB1Fa9IP-Tl~(WoPqx#(G&UJ^1~S%_EOzq+H&5 zc+-|N7l)F)+NXYXNguJw-g*4!F2k)hsp@i4_OLgVsinuAGV(Leo#$3l+qLdD>tihp zZu44SO%8TFKjqBp0#V+A@~l1T$yFN~=egco9=KDdWRdx6qu)+nTVr2(j2XY@`RMX~ zlUZlV*62R>aM`{lFVD)oPt?|nBSljx%EM}mkF3()f2`A_QFm5p7Hf&pr*?>}TDkP3 z`H7G#kvFpT_ynZSREv1tEp53=w%UeWMYAG9bBBG;De{Qj{@BLp&?bYm$+Mpf%MKK3 zq+Fft!-$U?)$dr)wT;`0`&nL&e`aU7cIxU3Lytq+M%yy0%Il}KEzI|K>+qXzZdSfY zy}9p*nzERgv5VqvytYg&zMH#N`}8lH{mymIvHtD)c+=F$oikPT4*5m2;JX8z&2QLv zFN`sioEd!XEo+O~Q09Gx$%7cSm&=%%b_SiV1*DzNc$LV=N!`#^%R1V)#a7dTdavp? z2G;{5CEY^8a(DcwyYljSQyslm^Rif`2?2}xKUv;u(~773e)EMh`g<%qx^P3Q1do=g z#&?W%7bcXZ^EeX*`EIu#8{M(tOyX(#P`x{*>+{5Z8l9$WJvBWk;qkWP>V5Z2s8V;_ z^R|Vy<=(nmm*g53~#whDow`dBqnmXLVWsD8|8{WtsC`lOx7Pz|aLNS|sFZke*u$kC2toN=dLe52c5x640ktNX>DQ_e9L`YxO8 zcR74=@&0FH#NrEkwdR@EW}MyptI~|?T{OD(G#ha5 zzQz_S4nJM6e@R&B;^oJ6gY2)^g%%odTy8HMzw}Vl?t63G2I~7|R(}^nKx(zD*rCnV-STE>|Av)R5t zt%QrD^`d~KlH#|{MwuZtCixIUksLJgmmu-J0WhoFeWxkEb76t zllqcY#Y#WQ?yMnhOj> zu`YrhywJF~Xcrrs*dW*|3S#-@_BfX^&w2x=wiqd1j5P}*2S~nXeu=Kkq zP)A_`O97)pX_@#vRCA!d-2z1nEjy_JlKEAj45T$W1BH}EJjG!`8dM27;|E~ajkJ3N zjes`5g-A$pax!_2?b0O7#`zGXML3P9>`?_l!Pg{8ji4q2Wi?t*@v4o;K;AKl8dZ)! z88#$h{$4Z7#RKsIFiL$~`y8%FF0ytp9BneZr30j$Mb+sfLgB{6Gh6@=M+mrNRdy@2Cp2SN% zmK<=MNOFxP5wuS2z)B=&#ait=kD%2m+P~gywT7U@s*~Cla$7C5v~}ELZF>A7gP#uVbU|ci65tFpLW^6ipf+~Vzbyx zC02T8!iDH$nh#YGL=1-WpeQ)oiG^f8pc8=(gnIED3QuUd03Rk{vl&c=iZYXpw6PlApju=}6s5 z8SPD-kcB~Ac(Mjxvq^i(Vna`rW##U2;b2MTnwG;BVkrUi(ig6Aa6U%}52l$NXOHD@nd8`}j>mE0kT-aW@_EPchMN?Oof{>Z431dO0`Fi&f@R|q-YDt%w3Uxv`iE+5oW~)S9gC}RnDN~eE-RRx z0l<>k0KBL204SjX0IqHa0k~5*3BV<(3Vkbpu03JLse68Mt- z&%Ai*VjkaED`zq2|5h)zfVxZG08mdx-4##eP(P0bpuV3CKz%o$A z6#=vWy8)=j9|WM;O9cS+{<8q&T?=Tc-=lsVAdVEoz^fnbAkeWV3SxxZK2d?*Qhgb9 z=BD~Io;^wRXmzSyNN$$4g9c$=5R_RWR*VM;*pb0ual**=tRX_-6cSKKKp_Ez1QZfb zNI)S0g#;85P)I-_0fht<68L9I0QFu}c~M`+_vom{qw0(A>p1}YZGSX+Lj4_ebJWxE z-8}03sLNwIzQad-9)HCeex#bx=~3@Ty}dQS2!KCgjdoS|DIKcUSdI(S6u<+t2bcjm z0L%fHZUM9tpfkV{U4WtM zT_esRWmTL_(z6{PUQ|GKiT1Ijei_{X!pX8Y56ZqvAPapvQ6tRv^6epVQ~KjPEuntAhwz^H+WPTqjQ;iRVIY0{>eT~i5PVOGR(>Mc7pNcC^!bnx z)Q=L!`}S+=M^5;E8_==ACJ<{yOF|rbAE;A&!_31s^-$FGCaG hL1^>lAr7MME%fstK&A7Y$&+r|$J3KeLH}zc@Gp8InX&)? literal 0 HcmV?d00001 diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_2.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/Bug46610_2.doc new file mode 100755 index 0000000000000000000000000000000000000000..be9083140587ab910a18cf4f1f9c85ccdaf9e651 GIT binary patch literal 28672 zcmeHQ30#fY`#<+~Z{1Q_P$_gE2y+|%d#KIc5=oadbLob#UdJm)>{?ff~_ z`gJ?CZV(CQNMy)6kvx%Y!DoQqnhvWF(i{B9dnXc!kZlD3uhjhyY2e|;BP20No<&Hb z+9B*jLZD~CaR^E8NJx9)7wQ*!2OP!y=px5bCpPMY%$~=f5XTlrD}9veLqH@_ejjMd zPbVm)JZqYVAJUs0#BqN_p~CMSb^-=|YtGk_+5z7T5-S3AXr^@hG7Um3U|72~3;P_^ z?S!QeB1zwZW|+Vi0Y&#E)Bw_*&?IC%WDM3OBoM;-P#;qiAmL~|(B(=*e^MVh9ZgYg zOGnG2Dcx>54TftV-~;)vNnM-UB}q~KaEOQvxyp9<=}}Tb$@>i zSXf$F+w|^ZYiDoy-e>jRXZ_x1^WOK>Ih4LShy7RQke2b~Jt)~ZM-06-_J zw&MOB{CsFHEf)`{(f+pR-Y-ZanKu~r>fHg7{C{G&2!N>k#009SLJ00S<1ynAB!=*afCRub z3a;W)xoQC&8TcNn5=OpLk z9z){bB&}q5(ZrtwL&+g<+7@dP1y^)qfb*-U7^q@baXA*U%20P9q=vbg75JxWaS~3kVHbwPsFHOVR#38PvkawAWPDt%&kz|_NB9N-pGLYQIkUV2zB=v#V-62>j~yr! z#P9_%6Gu*u%KLu86IYq9sTa7 zvMe)Av*o2FygP@s^=IHm7jW}^q^gZqHs3uAD(VBVIOHZq@x!Cd&G0&V;WGy8?+*dc2+~umT zERa2Rb722d!8c<29ZF0-nYv+fch1;pk#oL}uh~7Z+~>mgea~2GZ*<>du~bbt{i=8Q z@a+QvE}T@kIM}tu#?K&e^PK<}I$C zJOM9=ABBTM@wBJHF|LZ!jf8bmv##3uml?VG?K-u+>^CwnSIfB~@8a5;s@n}IwrA@) zUS2gs>06UrIi*bg8ME}##u+A?wezpJclp62{6#I%gPcHxF`s%>gpx)MA_q9V8 zy1S<*+xcW@%dtj=b~n3S#mdZcf7`Cyy149mMwLN`{&&s^?&Zbj%IhXtAB=l6;q-Z3 z)y2;?Ji2UN)z?LBt(=W~&Gl_O(Z!W%8;*J;b@q0zS*3Pwe9bh`fx>CyT@4I5zx9af zT6(~JOH4wD$LpebxA|)pv#!tIm%cN3kg6G*sl%wvF*U6UEgWLGDI-m-i_F!oCizQV zUQ}OXGqiku?u>a8u$AI~VONJ>fBKc{lbL#tWsEyLmu_o^Q>Fv@#+I&Vw8 zYfa&bg{38vr^xRJT9xx%QEABzq2twq;R7R!!lz|Mj%^ z^bM%x{4;zxp_6)D?lD`3Rke=`Qf#?%C$2u$w4-E{w%PS0J-6%>`;yq;Lu-ebbN$cm zF}@K-CKmD=J1eYT$1y+Sce=xFi)z0a%jY_BZ5H!fJ8an0v$MaB-oCjFi}J=qAAZ1E ze!aT;#IvR$f$p!9Hx)ce$nmSp?0jJPxtY^mJ(PDiD0=?#-MI?oZe8j`C)J{2BN_}$ zp2R(^y_?wYHS6uGTQwTWhqj(LW%aI9r~lg*%gqaBW%ak7ye9F*%cji>*CxKbs#Dj0 zYm!i*Z?9u%Hil}o27`jvR^;XE%rv&|obl@;O}Ct7Q^O6{9hbi{uGcS3BX8viy(~|a zH&kBG-Od)a%h&Ne{2;q*!QmA5p&hr?&(P28T5ooD>oM1jodm%Xx?3yv@ECJu+|VEQ zCd^YVoHljx&6lON9_ym+@a`F1pP#OOr`xfNqq-jH173IS&)qPf?3*PHv9%iy)-PVz zY5%}sa+d~YE8eTj*mQMMgjUMLO}-8(hq~50yzbWZY(dF&y)-kmUCd&I8)>uc1Irim zP^j=VNV5y%r#zX|$yeS!N8!-C2OjJ-?d2QtHB^ElIt`y;;8eHxl5mpIw@U?e{7coY z3a-ip!H*}MJymHs^ZfkC+)+F^n~Wc;m2WX-8@a`g^F1fWs^OYL#8+boN&N z%o!~&!<Jx*mQzbDxJ#+FXUOCi~RY4p|CY_7u$v56&6=EvLvO zde<{^+ap^wHYUw~K03>fuataaz6T>VX8e#7{x=JE6%R4K7W_Rg}1Ej3Zs~h7&?!m*lrFJYr1Lly6Ka8G5vJ{ zBRgeFR~6Gp?RGnK^XuKNzR|elBPz8I3dt#%(_!tk&AMtGUoXmJ>Bjpk8~S|pfURp@ zmG7poI>o!Rm)E|pwYMO?EKQFyWrXK0t4WbP8qOu0wF>TdUw3n^&`YUj z+>SG|662rkJgGQ%-;`=an|*KFsT%Jus1@$Hd!Te{`b)oK#bLsN_wLWPxbV7}j*J{~VaOle-sE|&#uvUh)z^BxOp++sc_VylO6o6u3H8dXmK3wEuFmbNW|WU z3!H|ldu3exCZK3i@&cjlqfSAKh8TGo8Volwv&@NV-(zrg?)l7-uadUgeZO$jExqqI zIaijQcW;`KkoR;IW5y2#2mDmxFO$y8d{&Bz-`Z(q1exookAFAJIr?fU+i3lX)AjnR z3WlTDVr0sXjwK79hD?`;glh^x0zZCYe!k`ST%T zM_43(vYu$6eo0A^YmpC|zTA4`7kQ@XbWaPrp#Fn-BgCXHFgbhm~{EmoC#zX6u z!zolk`u2yF5I2?|6&)fJ^wYC6v(V%61^&W-5J6BsJ>QXTrgnPV=op?LfEO+l@cZda z=SS=HcT`g7%X9D#kMiV2azTI~+JVtI6OoU1}#FO#|;#QyNemL zPtV*DwU-WG=7PzMYfKox;_qU?9EAZa2J{YPW#S?5wm^Nm3x*h;{FZ}sXq2HJZ8L~# zv_l?|ZNyz1CNN>tfX(;;7^fwCHnD876n3XpV6qCn*%kO#j)#`FV#sx|W&xLE+l+eeUgjorX|2ntXkr?40z zf-8rxWKaqHWf)Z1s*RX|iUx`wHX~*~M^Uu_i{RcHQg_@!sut#nK(3t@6=tNT7=m+2LqRg3ZlIVSo)8 z#c&{_qx+LVY*&&tz?HZuMw3I1Q%R22G-B=y7!gOhvl0lNR_|(hK(>R!qEgO^@W|BFo2Lug)4GViL?`^!qj-5G~W#^{7?WH z)GZ4{F`2S5Y!;g-!%FK#xWG=Lv=oeFd~l#6>*xO>wZYDMhUHq(Jnvv8=jAC;F7XK_neY-p)$bG`>$*mPUhvfPq@aw z-W?%&ghebl^E62#o=)Auz>Kw*E*MVu>4G8pd*Ou~2JDAEWPr|bV9R9Tqvc~R^{@f1 zt_-0j8cGy|rS5AqkipJimyi-NYnB?$-YwiT7}|*ci^JX%JC0S#Wb7vnd4oGFtvinU z7>tl_6e%+}20nz`f-wLO!Nj&eeIo&nM*KaKPn|C0mUJqx_4I)~4WbdW%Y*icMDTg^ zM@|_AI3MoNP#DD#Fro!8qFvz{1YF$5{apMX50M1?y0r;?Gfe_S+ z3Q(Lx@mDu+L%c6gcve&I$Z*x+17R!^-#_5K?Ob@0MfV7OU;FeZXZ`$sZtV1QtmngGub|0rR!FffMe9T*tm&*zR7Mg;)h z2|(6VMI41+grNK-`o8$<5U4L`2Bo*d%@pS*XkB)Xcn!fnH zo&&)5^k`qB{f__V1?_ZvH;;Bd+VW`U<2!t`=P@4sNdaZkquq~oJO03=76AW=0UVMb z_$eKl*E#?$Ko@}d^nn@xx&sUWJpe|4o&bzD25JJpBWY#;bAScF5`gh<0XEHk3?roO z^BTaDVnR4YB|du+181wIgBHKssl(|Jx)j`Vk(I~MB;MNr;zk9UFBV$^sQ--72f|6s zJQwN=7q2<=^+bU%-48+8kP`#vl7zr%+Zp`{{p+DPoIJp>KWX{b)yEFX$9W6ej*b%gnM*H>Z!Vuf20{xX zU=Pz9P8NoKCg-QoYA}0>OBP>0P0>2%>+Z7FWzgI|Q$C#+$F6j)Y^lM2mY)H=F9Q#F zY_;UvjnCj#&$V&wZB60B2y0UpVc3)i_}GD3B6yC#GjA?Jf9l>szZnFmcr!cl+?N#4Ojr+9Tk=i}HP>5dzD-e~r9?%<(Ahje-rup9e) zizKk)m(O`u4?ip7dAkyxRUvvFCCi6`-&N{)4ZV@$M~=UGz|{l%vo*d>i8S>(4EMY< za^igUCDpim*ukMgr&pgl+P=GCvAOlW>8o}5bJMMG!ygx(+tv=(9be6JO?hy4hCN+-tNXT|5tNsEu6DSRPx#})372TZQiQ`E3xbR&3!^n37Zfx|@8NDZ*{Jb2a|8aU(+@A8R6Ufb7cj>+r zd8Bhv`lpWz|I*HO{0j1x+}*LeZdbqjw+q*W)- zk8XRk=iT}3q^rI=IL5YiJnq=Z*&QEk>wibl==s4bMP_1J+j(uVhIB^NIiVTdDdtD>_BT=tcWSUnH=@7|_bd3~8 zx^b>IXLC5t<*yTG3L@!{#}j#xj7XYy8h=r*Ei~b5)a%CezR(q-Uao()j-y_C?&%&W z<>_MoN;aV>wsxG)=b3?A(H(h__I#Inog!J>(TRAcaYwqB1B%aLOZ3AiAau~ONX z@H5dd#ibIna#e^{_R4cj9qLU!oVH6^imf+KO4gEPAU%cG#m~FsWinAbQSR*Er%zH- z-t&EZ%_J`6PkHV~NYXB))TFBt&ErXFV0Zt{Jjzg4r7J4Ia?06kJh&3bJAtqizNJz3 zOXnqTmscwbc^QjynpZ+xx?kzgeg?TQy_dG;gIQ%d=aut(o(;=5Y;k8Jaf`co)m?hN z1Cj|mOifAWR%uMs3!as?y0A(&oR&(zN<+4tge0WXCbyt$%pc|X`s6e?&>h*ebW5^R zu7dJ1O;%}kB@caliAh(Bk-Pk>c@W(-%~KiL^OD>^-(#5$nWUt#VQ-%8>%7jo+(`CW zgd#7mvd;9&RT`QT%40HDl7jYVB2pvYaU`FudO#MUQZb#DRY~3*$fc}a*L6mzQ^_em z<|XA}6tiimQd--0AD(xmTplOt75ciYS}(8!S*`R`k~XWia<%%y$yMoydVP?iYFE`y z?q2Cy)Jq_ZioumzFLKurnaG}l`lN%e6Xwf;+Ca8#wLubTF#jY;sI<+ycE|fWFVl>3 z(k9iP{kdCmvpPyLb+%`I#H1z-GP_otZtVtnr()l)SlyW7HRstr+?N+g!7rQdx3J~e zoxh#tmr&QZF|ue(~u+8eS4S+&(A%B4z2^{-@RZ6k%; z%ulJVDb#RU6|PyIYM`i>=*vLqmSw8^x>9O-(s>1I70D`H9Uv(X{r4w?a3t-I%93qBU_cVcr$(P)wfD6&I79@Yt}70myKrn z`lo%hkivPF$52{SLXwQy5tdG?;m!9*yA-0@yHV0CuiuTlh2@(<>Qx`gi%moJHKp9% zPouTF>`Bw++HDX?L!mfd9kd$q(OT0}S#}h!Vs)dHyGc%RtKABfp33IT9*$nehNWVb zl){rLE2jz4Tv?BNkF{Zfno;GZa?JGgTlOYjU?~mvBvjtA8|jwWoxH8PTlG?qjwZw< zjgbas;DKZz-T78I$Zxr_G5eQgs(zNo9_Ytltv2)FvS?@LvT6Ah`K_?OO{UzWDXMp* z=_V`dQ<q`>71&3nU}XZL%yK{*W}Z+?H0;Ib3!TbZB4nCPf^J#r?RqgZ=m0=uC4p#ZzQu! zU&7Kpyk89d62K@#-DtH!U)_)}paE6qec&g;lyKEs*`m0#$=p zi^AE9wStrz>3@#Db-i`f>n3+=b4bU7T2U=g)pTk}T1(K5iz!*1F>SCks+{%bYO)qQ zsrI0xoZ+`{?m)3smD2b~jtQ4p{AuClIE$HQ|1*|e? zK_2>YMY(knl!lnrDmLY~wk=z$=}e?#Z5Hdz+Q?TP5>xGNlF*ULORYca4O&~?G)Xos zJt-w5^IGA&B=e-SN4{S+r+btxc?d~f-mbIn@A64yKhDUzNOx3jl5@~jbrz`{nOytw zjOsi2>u^6n>6YCpwl2uVTA!{xYfqPQ&DBuW>M~qtNB;f>KH%)Ew#o z({tHK%|5SEXkJ(SPnC|#u{pl&4wNzM|tPN)VTY6S=gw*+l zpw(b4NVUy^Hk`@ALY4&u?P048&64C7l~0Y6NuN7X_VPI0k)7qo^gtSEEemOc#UsnH z@sCU-UXD*x*(qFCa`IHNkFez=aAds%tF4m!^0N^Ym5I`7EhMuAlUunjS?h~+@+IwL zX0qzSAAgBSMn|LKbAu6D}EizV=3*5-`blNzm8Q?%eqVg(lptR zY$sgfo8_o1I$=@P`n6P=$Cj3=6s%pMSmdE3eOW_yz9kV!F&KY5mpC;pZ>1p5qV$;j zOh2L=OaHo(O4W-|@60N@k=&*KN{QK&J*}LY)&=J!S(Ur|iu{E6UzM=J(A|nfHmlZ| zOONs;4=Fu2`|n<(_EI}uDR*)WM%R>*aNk+-wUSgzLL<;h!Pz`({WaGE*E71(($AS2 zF*QG-RLHa1k<(LoTpQCX#5V+*k;>g_+equB|B|bGx0P{FFDSM``qYwvLRYwM{7=%+ zd4(WvudPP*oXjjW(jJAcm~50zGO_kz;DgNfs6Gis(1O&qBp14(Fr>3)JJJN#%91CO z-^(Xt$;^C~i&u38BT~L5sMM|R?lez#OA@YkU=mb{BvGYFe$A{;nxJ-}aypQ$*;J6y z7Ua!p)Y|o(>JjC^^Z{ED<;)&YWb=KrCeI9Y5^WTav{yqD4X<4WBk+5b=hWV zRg2NJ%blmTax&}AriLjd^zgWHf}i8SYbR8t%=J-%mWw3hG+TbxT8Y-WQy%0S)caIz zAggs6qB$FDQ%C)EK9PECpdmDZEXal)a3f5Ic`zUDgT=5G)iE?0`y0Wu?wJkOkS$0}7xRN?<5l z0>fb>%z-;$3%m>4;eE(pxkeVufLmZCJOdkG$6x)wKmOn9zo&xT@0>IJ zr;c{rh0t5;yj0EwundM$4r$2vO<07Cijc*6$YiBMVnfdx%o45$E5znPrM8Wcd4lKd zhrz7+*$cy2hSlIqt^rHLyjk!jlr;0a*I|EHF8g8!-d8Atuyp5j`2{u9{?1FUsP6iBt2~ZBV!yH%+ zPs2J`4{yR|*a4Lwf0Y8MkPaDe8_a@*un3mIL$Dq;z-#b2Y=*Z%zOD(rt|_#EwvY}P zu=8F2Z`1nY_^Z12C{Nhc6Ric#toHKZN~LwJ8X$blxqokf(w_}EPzDoVDqIh@!7PxE zUk)o^3v7iJ`1_V1-#-*Cfy-bxl)(g;3fIG2m*1|e?1vbHcH~5ffPuEw$KhTp$sO#1F!_1fJ9cyCc!z-25yAua0|?Y zg|G;g!DFx&_CYV!nD>DJFbIaiB`_Zrz!F#r%iuA14PJ-6p!Mkm%?ShM!aP_E55Q_z z1KZ(!Xwd@whPKcS3W4^OHy+Ah2`q)C-y!Vn!QX2y@K<#l-Vg5h{5{Y6;ud|Z*5VQL zYJkcvqpD1LAm5hA=w5gqmLrEbfZDltBf|fcs!E?1J6U06)+WT0l$4fJ_(!gJBX(hPf~g7Q!NU0G7Zecn#i# zt*`?sVLu!Ie6yIrj=P5&C|H7MlLmi zsc~0ZFRL*|tub}um)#j5Q~0`5W0)GdQkrcy z)_Q>M+|`Kif-|-6Rvz6-!rC*tR?@c8S+ki@LNz_t+@qC<);1_*wq8V`DW_V?rFB2K zRXU`Q^sLQ2=w7o2%|O%~m##eruTpk)qfvUzDrnW8_B2=M=Bs7#HV+cS<;q%;baNdx zM=Q@^OpIp5lml6tav|+7olq_dkf-J#q?2w=L7}>Gw)OWmUo2l|Z+g_0 zse3XgYnzcXo0C0h=1dl)l*rPg56*s^+%=n5Go;$KqNi>BuK7@TQ27m8$r7ZwwzVp* z0)M_;E1<26HGd=D;Jl3F>g>eI+s*g{cem#1v67$sjoF1}11bL^W3=~H;9#>vXI&6ZR@b{VSF#0_ zl-d~E=bZ}J=}y(H44(vxqH-Kl=I+SdH5(iY5ECm{v-4An!<_c$*pomJgorKVUV zZOz!*3a>(*Ro*2vAz->o#6 zXI6TGbv~*&q|qva#)Q$EBhA>WK5Ip6l9seudR6S7P3DX=)#gy;eWYu4Ei9dB)Js}r zrdeKvZXU+1Q<7(Mx?t@;r6SWW;XsFKzGdfDfJk#qHma{xrcURqB^9KiIdR+Uy4FDG zDbsXY{bseD%^17&PMKaO(qY$SH8xW(O>v%A`XzbVb2V3nG)H>DI}WM|q$5`UX^oO{ zsx-@^X~y4ILbLe1Y-#xrV_@AaNRig$ z$-b2$<;gTsb*)x}**j>iCFHck=Ep5$wNq42n@_WlY#o8hF7PSgRbGYIkon2Ld&!P$ z4Vs0n+O09E4YxK-E48*n8fpC&(?GX|!>mzxF)gtcne;#_Z|!-lE>q}EtL4Gub(}Aj zO`ASjD^D`-81g?_O)AYd8A+n%uPqkqjai#dWhCi@-@?=xrO(!~2Kug0Ow)r>ljhkQ ziP9^VPHCoAfGB3GRS%Cvwy*bcrA-PY+-GQsEUdm%8ECDJ`8La^yn%F6Eq1li6-!6I z6y%=*3%5|h_Ak$1`eywKmtL*Zlv0KM{cxxe?d*xcA0_xM-yK*W$$p!#tdsgtj= zRhz2CYqk$;^_25sTK}Wiou^a%C^@PQk9w)>t5g(Tc#UW>N74ybT2AwWw%g(5r+&2R z_O8UH@a;XU+N&?s;xG%e{*GEqlD1n9r+Q1aW33&n3k&oj*$-Jyt0cpt1bRb8Inh{x z%29F1>&P#Ly@}E#i#MNWeL=Mm^sKz8JcMcmYfY()Tn#Bbuep{roGyiDC8=kn57x_e zBU17}PT$nqRnJNFkJZf5Kb5o9p+UHkB9$|}iQCreyFQ7^$>Mi@&SYDAO?Ayyp=;f! zHj`!qvA7W}wb-rfZS2gfO4?-p-K<1qs+OP5o25G`$#==yNS4GF78_?L7bTrr3B*{rRb@H;hNk{G(>7CV)POGJzVV$;opXR@*5w;_Qqj&xa9tR^u3CR*7S_4M5Ull8&s4T$cB--tYHy`Yx^L~7WUgt&w%M-a z($cq3-KA0tk7?=&vsm|Tx9-+m_7=wB-qiX#ZCI0>?44I7!xTc%J3FSWL73KiS3N8_ zN^@-Gt!pC|;BU1aUVb3WbNVA4w$;C?69O-<^jqy?`k^p&PWe_krB5a~NnY(Sweal; z^(56Quy&u>o7&Ry1g<3`dC8ick4xjtfJTuPTyGVLM$U{hizG&xuq~u@5alh_M6NaF zj>eG$&eGEHM-jQkmBx|Q+;=7|Wwn*1XGy+v+*;vw__lC)9$xF3ZOA_Dol(7s6Hp%Qmh3)UJA*M>1|qDRkfo*`qXF`Y+2+ z+RS&#%B{7m@^EX1cKBWaw|aUr8$DhC#3)%s`- zYYA+E*WgXq4BMcPIr}0Q1cPBXjD)c;9;QPDybfQT1T)|k zm<#h@B|HP?ur{C_q(LTRK_L`D2@HTiFc_vo1 zI~WR=z!bP1R>K0Ki(eFRP zI*ntl0zRIVUPqo)^af}x#z7cDJ`Td5Zj>bq%VXXhMt7%dU`7vqfh?aq!y^ThRZrvv zi+izF1TN`~9ARi5=1}3XzJvqig*?{}-GOaTT*SOQd=}Qj*umE)kU;Kn?0!f3TLCj* zF3f{^lz)9lfF_U*888w?Lpe->Suh*s!2);$mca&i33kD5Xo?;sLKbAh47deugITZ? z9)h*74y1Q`U@uUCc=aF?vY;0f!ce#bq@UBF0@lHLcoR0mc6c8Sz(MeOu&xaie+eFe;b7TAAFD1u_BfEh3c?u7kt0Fv+zEubBw!Bn^&X2EQD z2p)meum;|T9k2^_Ln3}93EDzC$b~#8gd&&@6)+zbz&cnD@4{Bt3;W;z9E2?VQ#O>q zAQ%l}pbRF!7T5}vunSt?$D+^<(x4agfsrs8#zPr6kN!Dz!0{{(_U}2tGW5QT|5y5_ zz*LwGx5F}c44#2ium;w`+pq=p!G7@Y@9|Iq17JLq!HqB-o`F@c2G+uQ*Z{j=H|&Ld zK=Z__3+*5cav&E9p$KNeZE!oxfw?dbR>5l62(Q3?H~^||8bJaiLK37vDzt%ikO^6k z3wbaI27~IUwXhEMz+T9uzRH6>PzWV33@(GIa7=qXnr{gC11%M+4&!UKf2IF=xCLgy zLRbX%!D3Kd{Ss`1y#BaQ=mUjN3?(og%3u;qhPf~g7Q!M}4l7_4ya}6OJG>8GF**bT zU=R#}p)elGU^-O5LRbW^!<(=f-iEEP4feu5sDB}CHfRD(AssTH0%pJxSPH9PHEe`e zU^6&7_FHDZ`<4DSRW;m>-dL+~h5Q`vn^)e^-`)Be?Y}_f+lz8gnY!{*Ijx2U$g?3V zhZWEV85P2tRq`5Lg8ziKVGER(5(eymO3?3ze%r=tX(S2>o2uWnVE^!nbM=#7oq)c|keh_x`lx;hr=h*SQ+<4*_J8EQ z-SM;vywnT44s*uFbxn_NwQS=B(Pdk@B`@0;p8~Dtw1`igb8nrqm!TybCqjcc8{@x9 z@x;RZ+9fbEzLA&U#d!<8(>+{6)hF0XH3SWI=j5^th6 z$-BxM;gxafH0Y2TciLKTpl?!-~>S9_L--$ImbS`;iuT2TOM!`p_#1$?PoeyZp63;T$PIy&rcnT_El2e;JUk zk3_W}=iPh#T{=yNSjX|fF{jFTH&)LwA_H;Sss2RSONOEF5H#cRRq!&LO$3+20%(8? zmP!t|o+7vo*2B5T;U-uLpF(8H@IfNS9`(Cp1uOC-<|ErES7QV^+f14b>jXLmcO!F5k_pjYQe8F^n8Q=Aj zhbYjhc>R6cV-%8MD8%ZtZ%3UrpR)4*do67%s?a{4*fov?R`s_xNwhWY#tJxPxdHGgQA$2HdKF-+^tHH;A7;QzxE+2955iJ-6n+E0g=b+Etbr}C4gL=Q zfJ30l?K-##egO}_5_kw6hZXP#coBBOF8B~Wf{)<SoO86)2g$RPI56vJ6nnMbt!Z~m*oCn#^75YFS^n?CT z3>QKP41{4Y9LB&EFc!+;8n_OA0zZTQg8A?;JPvQcU*J914*!6iPzk#rAwn8K|HFDJ zbbu`A4TVq&7r|h-7>2+wmYv6fU2QR{(;h(S@PGzjTA)F44 zArrEoGvq*5$c1ju8~Q>K^oLRy4kKYKl)+Un3GRmn;SqQoR>CvzEUbdJU^Dy$w!(Yx zci0Yt7Ha0@&R%i&2_0Vkh~UO*?v zfkGGpli^yJ0@Gmz{0L^l9qb_5e$Z* zFb>M$CvXQm3#;K}_#^xoUWYf}FOb0e#97cB^t;*ZAssqEHe3k$4ec_x2L1$Zz}xT+ zRKgzk81};H^$8Q2!dZ|7IdCyt0(0TNU>+=jCGZG50Z+moU_HD5FT+Oo6KsMvU>AG@ zd*EX@RdXnC1~h{dI2+o+`OpqBp%Zk4zR(YfpcLG1c3;N+_uz840&aktVJ6IipTqxw z2Ve>O3YNlO;BW9T?1N9>Ae>A+&;VLPTNnYOU^I+}8(=!jgjw)oxC7?Gd{_=o!)jOq zufm^U8~g*_hkwGq;1k#n2jFDQ9lrcpRPp{VwoRuo70o^RO1y!wc{tv}W#3 zzYBaeoC|H>JZK9YpeuBT9?%m?;9?j9A44cEd-o7ybpGKwTOL^`SAG1xe5xQXmzEz;KuWx4;j<{eJJA?B4}< z!{eae@qG%Og;nr8tcCUP0=x_x;m`0l*am-x4`2`MgHPa7s87S`RA>Naz!10;t^@t{ z?+s7^GvHR34-4RKSOoXMVt5#q!EfMqpx+067FNMJcpr8`CG3WeVIO=72jLK?0Tu`G z&=Qg%1yZ386vM4>2mBQ7hKJ!%SO$;5bMQQzMx(7EoDOHeIdDF7haQj*y`c{jLO;;& z4c`bi!E~4fx5H22=Wq`!g~#D{uo`|3&x3xCcs-m(BlHYt3Q3Rxsc<%&2R)z<^o3%$ z60U{?a5p>%zlO(QIXnp~U?=Q_eGo^R_*7^DXF^kG22n_c6le`?;R5IY8PEx`ARD?s zSLg|Spf6kq{{h2bIE)1Srtn0#2`b?G@I$yC9)ucpgieqUL??b{EEpae?cBKS`j1>>L$u7N2q z4Q>GaF7ppSzsWocehhcO?_d=?2OHp@@EIJ0I66-8&=B-{!skE^^n(%@2t#26tc7*( z0&Ijo!X|hX-iCK!D`;Ip3wlg3Vls&7KuiZ>IuO%=|7|*;AvA`jJcfNdUS0L{%IPQ= z4Q}jB<7{z^ld*i&I~xk1Crkm2y}2>9d;-$g**F*vx591kNB9%CF)xj6<$}hf?t&*_ z1-Nl3tu6f*G-galV^O_8V^1ZZu_ui!{TSxMUGNrchIWk8vj(1@&V+|U^_$535Zj9oe>}yOSnLfY99yDf90h>W%18z*96@Bw4xG{k1+1D7r z&p>_t-+=o3&w={<>hCAe4{i=EU@E9Dz8=(MA7*9_E$yb)ZVunv7J_5E_88@RsRf3yD>xV~6E{htDGeJPezngi+!Ers90Do`KD^?h7lW*&VL^i#@`2@Ku_AvYOCJ{YKyzJwc6C5LK^MY3&6FZU0d0;fnA%o zC+*GNa5H=#*1>v6p}m+2y`eAM2lvBEpmwI(lP5F6|L;N0H;~V7B2Ja3K4;*m+JKdE zovO2Tg!A-paQ@88d%Bqqj@Q$FvZw!;?@*7pia75cPKmMiyc(CwpZ4`}+xSd7wy+c{ z#;!&1dHo+vuVXuic=cMv1g>`?oOrKJ zow)e8lk3LUF$q?sp3C}F=<6bH8qbX6$uf>Qp5w=UYTZb@K1Yo`pI0cx5tE)NK=AqX zZcFrb={>Ts5`_2q{-2&Bv>)%wpf07e8=1xXFMLG2AIwSTTFtXMqfhcl{)m|!#J<0< z1Tw1C*@E~0O%`9~BSW)KT*n_YRyoz%EWJJ2r*NpQwApp&^f)iBJ5>UDz*{Ic9}O9- zJ{kVE`%~sF@dRE8x&|%1h*yFpV!vbkb0h4=Z{zntTnmv`v& z`}i9&x_h}mbHewS^V#ajdqD-e4&+%kK^@o*G!4By;CmIGm2+yumZqHF23A<<9Mg33 z2C${6<_%*@v&(l7G_kzN9OGR*+%j(|(q>>Q-#UjaO%-oGTctw^qq*}ayu70PgHNM- zrtN;G@`D3AKm9oRe&voG@9o^ZWp`zH<-2yD(yEW1QG6puOula1ge$Ib=fc;+`^rPf z#7S3;z^A!qFB&`X%CX}nj0is~#ng40G2VsaMopS{&BQU2qorfUj2m@%^t%%$jplq0 z@Y0i7k_FpeckQh@)k9w!_hn5u`T4l;zT_Ma+R7zrD`%;#OC!6nPfP+a3B)82lR!)Y zF$u&Z5R*Vm0x=21BoLE8Oae6|aIF4+{Hrg%x-hMIqo4hp{(rkq9#Q{)FdbF(mDOJz z3+j)TgZlfkLH+%?z|;i4Da4!xEC%)K9|HB;p8)mSSAxby*MR!@8$e+v5xDxeu0P*| z{hkxct{5@-^7aEplwCEkY}}N~Ij26a(xk1P*H(j8+HO1ZR6kC3kGgG1ViZp+_?+>A z1h8c--W&%l))SRywOOj~BQ6{oO4)x4&0?fpf7>Oad_p#3T@tKuiKL z3B)82lR!)YF$u&Z5R*Vm0$;TR)HikW$?CnTAFtl4=Ib@jtv;*f=GA{!-(3A{&Bd$V zufDwc`I^I5e_s7`mj3yDdiDD?@6A#?|2=y5Zac5y`+Z?`sH2b!DbN~H;cPeu&V@E` z9<+t?p&hh`G`IlN_dm$$5Y4l9giPoJT4&W6vY`v)0I%tLy!!8TgFNUCJs=+npeOW# z-p~j7LLu~nBIpmrp!iDImcjrS2!r4v7!0~Tgza}>DEtRp0++%t(Dl#YdsX{77T?rI z3&AF`xFt%?_3O5iSmbpgpQfxHacS4MOWjkXO`f;P|Mo>`dwfT~*RHqLGmK!rqUvmi z(68oJojRdSLoeQsUwW%_9M8iYxam?U2%qng`$??O?!|T#zyGU+a_(1t?T%xAClP-f zw>TLb`|jXshvBwg2hCBeT8t;z_apW(ewuV|>;T(*0>`M*%X$$0tH zIV|TVmY(b?`JYJsFIL1=@yB6#T8ha3^2su*@w#$Cp9H*@i~D%4!#8LVuQNb(b+P+(L^byysMSD#gb?sG`PnhgC4KBP$ zYuPyE+qCx6-1W3!U{$JYKiCISM?$49J*IEBB>$#)-MC$)5NK}P26TDdce`57* z?ehGx^}qbTib8ESwIO2d|1aPE{~}&|46;|h!1V{Ty_|DjB&-v8#?}AFD*ssf;6x<) z?Yj49bJFTdODFV+g7hR6_*AGLu8;i2GMt9`{`!5h*yr0Pfp4P!C%xGqh2j(I|Ct=V s%y-!1oM<@Tbo>8k>yM-(wbvi@;#{3tYm)TtPi#%n(Oz@<{547c17&3h+yDRo literal 0 HcmV?d00001 diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java new file mode 100755 index 0000000000..f750a59a6d --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBug46610.java @@ -0,0 +1,72 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.usermodel; + +import junit.framework.TestCase; + +import java.io.FileInputStream; + +import org.apache.poi.hwpf.usermodel.CharacterRun; +import org.apache.poi.hwpf.usermodel.Paragraph; +import org.apache.poi.hwpf.usermodel.Range; +import org.apache.poi.hwpf.HWPFDocument; + +public class TestBug46610 extends TestCase { + private String dirname; + + protected void setUp() throws Exception { + dirname = System.getProperty("HWPF.testdata.path"); + } + + public void testUtf() throws Exception { + HWPFDocument doc = new HWPFDocument(new FileInputStream(dirname + "/Bug46610_1.doc")); + + runExtract(doc); + } + + public void testUtf2() throws Exception { + HWPFDocument doc = new HWPFDocument(new FileInputStream(dirname + "/Bug46610_2.doc")); + + runExtract(doc); + } + + public void testExtraction() throws Exception { + HWPFDocument doc = new HWPFDocument(new FileInputStream(dirname + "/Bug46610_3.doc")); + + String text = runExtract(doc); + + assertTrue(text.contains("\u0421\u0412\u041e\u042e")); + } + + private String runExtract(HWPFDocument doc) { + StringBuffer out = new StringBuffer(); + + Range globalRange = doc.getRange(); + for (int i = 0; i < globalRange.numParagraphs(); i++) { + Paragraph p = globalRange.getParagraph(i); + out.append(p.text()); + out.append("\n"); + for (int j = 0; j < p.numCharacterRuns(); j++) { + CharacterRun characterRun = p.getCharacterRun(j); + characterRun.text(); + } + } + + return out.toString(); + } +} -- 2.39.5