From: PJ Fanning Date: Sun, 28 Aug 2022 14:16:01 +0000 (+0000) Subject: [bug-63576] support capitalized text in WordExtractor (HWPF) X-Git-Tag: REL_5_2_3~25 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=913d1eecf52382f706d5903b2fbd0ae9c8770494;p=poi.git [bug-63576] support capitalized text in WordExtractor (HWPF) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1903738 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java index 01c4bed4fe..d6f410d286 100644 --- a/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java +++ b/poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java @@ -52,6 +52,7 @@ import org.apache.poi.hwpf.usermodel.TableRow; import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.util.Beta; import org.apache.poi.util.Internal; +import org.apache.poi.util.LocaleUtil; import org.apache.poi.util.StringUtil; import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -445,6 +446,10 @@ public abstract class AbstractWordConverter { continue; } + if (characterRun.isCapitalized() || characterRun.isSmallCaps()) { + text = text.toUpperCase(LocaleUtil.getUserLocale()); + } + if (characterRun.isSpecialCharacter()) { if (text.charAt(0) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE && (wordDocument instanceof HWPFDocument)) { diff --git a/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java b/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java index 2e67e0ff03..4ef67c44c8 100644 --- a/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java +++ b/poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java @@ -402,6 +402,14 @@ public final class TestWordExtractor { } } + @Test + void testCapitalized() throws Exception { + try (WordExtractor wExt = openExtractor("capitalized.doc")) { + String text = wExt.getText().trim(); + assertEquals("The following word is: CAPITALIZED.", text); + } + } + private WordExtractor openExtractor(String fileName) throws IOException { try (InputStream is = docTests.openResourceAsStream(fileName)) { return new WordExtractor(is); diff --git a/test-data/document/capitalized.doc b/test-data/document/capitalized.doc new file mode 100644 index 0000000000..00f32a2e41 Binary files /dev/null and b/test-data/document/capitalized.doc differ