]> source.dussan.org Git - poi.git/commitdiff
[bug-63576] support capitalized text in WordExtractor (HWPF)
authorPJ Fanning <fanningpj@apache.org>
Sun, 28 Aug 2022 14:16:01 +0000 (14:16 +0000)
committerPJ Fanning <fanningpj@apache.org>
Sun, 28 Aug 2022 14:16:01 +0000 (14:16 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1903738 13f79535-47bb-0310-9956-ffa450edef68

poi-scratchpad/src/main/java/org/apache/poi/hwpf/converter/AbstractWordConverter.java
poi-scratchpad/src/test/java/org/apache/poi/hwpf/extractor/TestWordExtractor.java
test-data/document/capitalized.doc [new file with mode: 0644]

index 01c4bed4fedf69787b7ab3d7dd73ef47bc1fc022..d6f410d286531874915c88f4cd5b7d48d6487c78 100644 (file)
@@ -52,6 +52,7 @@ import org.apache.poi.hwpf.usermodel.TableRow;
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.util.Beta;
 import org.apache.poi.util.Internal;
+import org.apache.poi.util.LocaleUtil;
 import org.apache.poi.util.StringUtil;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
@@ -445,6 +446,10 @@ public abstract class AbstractWordConverter {
                 continue;
             }
 
+            if (characterRun.isCapitalized() || characterRun.isSmallCaps()) {
+                text = text.toUpperCase(LocaleUtil.getUserLocale());
+            }
+
             if (characterRun.isSpecialCharacter()) {
                 if (text.charAt(0) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE
                     && (wordDocument instanceof HWPFDocument)) {
index 2e67e0ff0306148e4327b79ff30a9704405034e1..4ef67c44c8c80fd461b8f8f95f544b2e35953605 100644 (file)
@@ -402,6 +402,14 @@ public final class TestWordExtractor {
         }
     }
 
+    @Test
+    void testCapitalized() throws Exception {
+        try (WordExtractor wExt = openExtractor("capitalized.doc")) {
+            String text = wExt.getText().trim();
+            assertEquals("The following word is: CAPITALIZED.", text);
+        }
+    }
+
     private WordExtractor openExtractor(String fileName) throws IOException {
         try (InputStream is = docTests.openResourceAsStream(fileName)) {
             return new WordExtractor(is);
diff --git a/test-data/document/capitalized.doc b/test-data/document/capitalized.doc
new file mode 100644 (file)
index 0000000..00f32a2
Binary files /dev/null and b/test-data/document/capitalized.doc differ