]> source.dussan.org Git - poi.git/commitdiff
51519 -- follow on, make concatenation of rPh configurable
authorTim Allison <tallison@apache.org>
Wed, 8 Mar 2017 16:44:40 +0000 (16:44 +0000)
committerTim Allison <tallison@apache.org>
Wed, 8 Mar 2017 16:44:40 +0000 (16:44 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1786021 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java

index 47865a86eb8ba1f9c0ea706b4f5da02a19c9f03c..a482049b6ee9ce37c771b6825e02ccf2ebe5fb60 100644 (file)
@@ -78,6 +78,8 @@ import org.xml.sax.helpers.DefaultHandler;
  *
  */
 public class ReadOnlySharedStringsTable extends DefaultHandler {
+
+    private final boolean includePhoneticRuns;
     /**
      * An integer representing the total count of strings in the workbook. This count does not
      * include any numbers, it counts only the total of text strings in the workbook.
@@ -103,12 +105,29 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
     private Map<Integer, String> phoneticStrings;
 
     /**
+     * Calls {{@link #ReadOnlySharedStringsTable(OPCPackage, boolean)}} with
+     * a value of <code>true</code> for including phonetic runs
+     *
      * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
      * @throws IOException If reading the data from the package fails.
      * @throws SAXException if parsing the XML data fails.
      */
     public ReadOnlySharedStringsTable(OPCPackage pkg)
             throws IOException, SAXException {
+        this(pkg, true);
+    }
+
+    /**
+     *
+     * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
+     * @param includePhoneticRuns whether or not to concatenate phoneticRuns onto the shared string
+     * @since POI 3.14-Beta3
+     * @throws IOException If reading the data from the package fails.
+     * @throws SAXException if parsing the XML data fails.
+     */
+    public ReadOnlySharedStringsTable(OPCPackage pkg, boolean includePhoneticRuns)
+            throws IOException, SAXException {
+        this.includePhoneticRuns = includePhoneticRuns;
         ArrayList<PackagePart> parts =
                 pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
 
@@ -121,10 +140,24 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
 
     /**
      * Like POIXMLDocumentPart constructor
-     * 
+     *
+     * Calls {@link #ReadOnlySharedStringsTable(PackagePart, boolean)}, with a
+     * value of <code>true</code> to include phonetic runs.
+     *
      * @since POI 3.14-Beta1
      */
     public ReadOnlySharedStringsTable(PackagePart part) throws IOException, SAXException {
+        this(part, true);
+    }
+
+    /**
+     * Like POIXMLDocumentPart constructor
+     *
+     * @since POI 3.14-Beta3
+     */
+    public ReadOnlySharedStringsTable(PackagePart part, boolean includePhoneticRuns)
+        throws IOException, SAXException {
+        this.includePhoneticRuns = includePhoneticRuns;
         readFrom(part.getInputStream());
     }
     
@@ -184,22 +217,6 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
         return strings.get(idx);
     }
 
-    /**
-     * Return the phonetic string at a given index.
-     * Returns <code>null</code> if no phonetic string
-     * exists at that index.
-     * @param idx
-     * @return
-     */
-    public String getPhoneticStringAt(int idx) {
-        //avoid an NPE.  If the parser hasn't
-        //yet hit <sst/> phoneticStrings could be null
-        if (phoneticStrings == null) {
-            return null;
-        }
-        return phoneticStrings.get(idx);
-    }
-
     public List<String> getItems() {
         return strings;
     }
@@ -207,7 +224,6 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
     //// ContentHandler methods ////
 
     private StringBuffer characters;
-    private StringBuffer rphCharacters;
     private boolean tIsOpen;
     private boolean inRPh;
 
@@ -226,13 +242,16 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
             this.strings = new ArrayList<String>(this.uniqueCount);
             this.phoneticStrings = new HashMap<Integer, String>();
             characters = new StringBuffer();
-            rphCharacters = new StringBuffer();
         } else if ("si".equals(localName)) {
             characters.setLength(0);
         } else if ("t".equals(localName)) {
             tIsOpen = true;
         } else if ("rPh".equals(localName)) {
             inRPh = true;
+            //append space...this assumes that rPh always comes after regular <t>
+            if (includePhoneticRuns && characters.length() > 0) {
+                characters.append(" ");
+            }
         }
     }
 
@@ -244,10 +263,6 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
 
         if ("si".equals(localName)) {
             strings.add(characters.toString());
-            if (rphCharacters.length() > 0) {
-                phoneticStrings.put(strings.size()-1, rphCharacters.toString());
-                rphCharacters.setLength(0);
-            }
         } else if ("t".equals(localName)) {
             tIsOpen = false;
         } else if ("rPh".equals(localName)) {
@@ -261,9 +276,9 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
     public void characters(char[] ch, int start, int length)
             throws SAXException {
         if (tIsOpen) {
-            if (inRPh) {
-                rphCharacters.append(ch, start, length);
-            } else {
+            if (inRPh && includePhoneticRuns) {
+                characters.append(ch, start, length);
+            } else if (! inRPh){
                 characters.append(ch, start, length);
             }
         }
index 18db97f433f37ae5d48c50343adecfc429be3118..e49c11c2ead7dc166c779455d40d5be56a18b36f 100644 (file)
@@ -16,6 +16,7 @@
 ==================================================================== */
 package org.apache.poi.xssf.extractor;
 
+import javax.xml.parsers.ParserConfigurationException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashMap;
@@ -23,8 +24,6 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 
-import javax.xml.parsers.ParserConfigurationException;
-
 import org.apache.poi.POIXMLProperties;
 import org.apache.poi.POIXMLProperties.CoreProperties;
 import org.apache.poi.POIXMLProperties.CustomProperties;
@@ -64,6 +63,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
     private boolean includeCellComments = false;
     private boolean includeHeadersFooters = true;
     private boolean formulasNotResults = false;
+    private boolean concatenatePhoneticRuns = true;
 
     public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
         this(OPCPackage.open(path));
@@ -120,6 +120,14 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
         this.includeCellComments = includeCellComments;
     }
 
+    /**
+     * Concatenate text from &lt;rPh&gt; text elements in SharedStringsTable
+     * Default is true;
+     * @param concatenatePhoneticRuns
+     */
+    public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
+        this.concatenatePhoneticRuns = concatenatePhoneticRuns;
+    }
     public void setLocale(Locale locale) {
         this.locale = locale;
     }
@@ -189,7 +197,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
     */
    public String getText() {
        try {
-          ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container);
+          ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
           XSSFReader xssfReader = new XSSFReader(container);
           StylesTable styles = xssfReader.getStylesTable();
           XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
index 060c5a80dd5197fd1d21d4879a6b131da2dce689..4b5e62cc5c0367c5643d6709027b34be7c931312 100644 (file)
@@ -59,19 +59,27 @@ public final class TestReadOnlySharedStringsTable extends TestCase {
 
        }
 
+       //51519
        public void testPhoneticRuns() throws Exception {
         OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsx"));
         List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.xml"));
         assertEquals(1, parts.size());
 
-        ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts.get(0));
+        ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts.get(0), true);
         List<String> strings = rtbl.getItems();
         assertEquals(49, strings.size());
 
         assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
-        assertNull(rtbl.getPhoneticStringAt(0));
+        assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3", rtbl.getEntryAt(3));
+
+        //now do not include phonetic runs
+        rtbl = new ReadOnlySharedStringsTable(parts.get(0), false);
+        strings = rtbl.getItems();
+        assertEquals(49, strings.size());
+
+        assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
         assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
-        assertEquals("\u30CB\u30DB\u30F3", rtbl.getPhoneticStringAt(3));
+
     }
 
     public void testEmptySSTOnPackageObtainedViaWorkbook() throws Exception {
index 73d22aeebd9cae46bba641508824b4d853229a85..b6d411d508a02f3f4eb82e045067bae0ead775c7 100644 (file)
@@ -18,6 +18,7 @@
 package org.apache.poi.xssf.extractor;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
@@ -359,4 +360,25 @@ public class TestXSSFEventBasedExcelExtractor {
                assertTrue("can't find 10/02/2016", text.contains("10/02/2016"));
                ex.close();
        }
+
+       @Test
+       public void test51519() throws Exception {
+       //default behavior: include phonetic runs
+               XSSFEventBasedExcelExtractor ex =
+                               new XSSFEventBasedExcelExtractor(
+                                               XSSFTestDataSamples.openSamplePackage("51519.xlsx"));
+               String text = ex.getText();
+               assertTrue("can't find appended phonetic run", text.contains("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3"));
+               ex.close();
+
+               //now try turning them off
+               ex =
+                               new XSSFEventBasedExcelExtractor(
+                                               XSSFTestDataSamples.openSamplePackage("51519.xlsx"));
+               ex.setConcatenatePhoneticRuns(false);
+               text = ex.getText();
+               assertFalse("should not be able to find appended phonetic run", text.contains("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3"));
+               ex.close();
+
+       }
 }