aboutsummaryrefslogtreecommitdiffstats
path: root/src/ooxml/java/org
diff options
context:
space:
mode:
authorTim Allison <tallison@apache.org>2017-03-08 16:44:40 +0000
committerTim Allison <tallison@apache.org>2017-03-08 16:44:40 +0000
commit7ffc645a5f2bfe750c6f25655d86006aea3e6d7e (patch)
tree13b0387f68fc17b36ef9cc37a118685e42a30963 /src/ooxml/java/org
parent396cd26693fcfdf2450713ca3806a8b2a9bf0258 (diff)
downloadpoi-7ffc645a5f2bfe750c6f25655d86006aea3e6d7e.tar.gz
poi-7ffc645a5f2bfe750c6f25655d86006aea3e6d7e.zip
51519 -- follow on, make concatenation of rPh configurable
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1786021 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/ooxml/java/org')
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java67
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java14
2 files changed, 52 insertions, 29 deletions
diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
index 47865a86eb..a482049b6e 100644
--- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
+++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
@@ -78,6 +78,8 @@ import org.xml.sax.helpers.DefaultHandler;
*
*/
public class ReadOnlySharedStringsTable extends DefaultHandler {
+
+ private final boolean includePhoneticRuns;
/**
* An integer representing the total count of strings in the workbook. This count does not
* include any numbers, it counts only the total of text strings in the workbook.
@@ -103,12 +105,29 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
private Map<Integer, String> phoneticStrings;
/**
+ * Calls {{@link #ReadOnlySharedStringsTable(OPCPackage, boolean)}} with
+ * a value of <code>true</code> for including phonetic runs
+ *
* @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
* @throws IOException If reading the data from the package fails.
* @throws SAXException if parsing the XML data fails.
*/
public ReadOnlySharedStringsTable(OPCPackage pkg)
throws IOException, SAXException {
+ this(pkg, true);
+ }
+
+ /**
+ *
+ * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
+ * @param includePhoneticRuns whether or not to concatenate phoneticRuns onto the shared string
+ * @since POI 3.14-Beta3
+ * @throws IOException If reading the data from the package fails.
+ * @throws SAXException if parsing the XML data fails.
+ */
+ public ReadOnlySharedStringsTable(OPCPackage pkg, boolean includePhoneticRuns)
+ throws IOException, SAXException {
+ this.includePhoneticRuns = includePhoneticRuns;
ArrayList<PackagePart> parts =
pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
@@ -121,10 +140,24 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
/**
* Like POIXMLDocumentPart constructor
- *
+ *
+ * Calls {@link #ReadOnlySharedStringsTable(PackagePart, boolean)}, with a
+ * value of <code>true</code> to include phonetic runs.
+ *
* @since POI 3.14-Beta1
*/
public ReadOnlySharedStringsTable(PackagePart part) throws IOException, SAXException {
+ this(part, true);
+ }
+
+ /**
+ * Like POIXMLDocumentPart constructor
+ *
+ * @since POI 3.14-Beta3
+ */
+ public ReadOnlySharedStringsTable(PackagePart part, boolean includePhoneticRuns)
+ throws IOException, SAXException {
+ this.includePhoneticRuns = includePhoneticRuns;
readFrom(part.getInputStream());
}
@@ -184,22 +217,6 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
return strings.get(idx);
}
- /**
- * Return the phonetic string at a given index.
- * Returns <code>null</code> if no phonetic string
- * exists at that index.
- * @param idx
- * @return
- */
- public String getPhoneticStringAt(int idx) {
- //avoid an NPE. If the parser hasn't
- //yet hit <sst/> phoneticStrings could be null
- if (phoneticStrings == null) {
- return null;
- }
- return phoneticStrings.get(idx);
- }
-
public List<String> getItems() {
return strings;
}
@@ -207,7 +224,6 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
//// ContentHandler methods ////
private StringBuffer characters;
- private StringBuffer rphCharacters;
private boolean tIsOpen;
private boolean inRPh;
@@ -226,13 +242,16 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
this.strings = new ArrayList<String>(this.uniqueCount);
this.phoneticStrings = new HashMap<Integer, String>();
characters = new StringBuffer();
- rphCharacters = new StringBuffer();
} else if ("si".equals(localName)) {
characters.setLength(0);
} else if ("t".equals(localName)) {
tIsOpen = true;
} else if ("rPh".equals(localName)) {
inRPh = true;
+ //append space...this assumes that rPh always comes after regular <t>
+ if (includePhoneticRuns && characters.length() > 0) {
+ characters.append(" ");
+ }
}
}
@@ -244,10 +263,6 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
if ("si".equals(localName)) {
strings.add(characters.toString());
- if (rphCharacters.length() > 0) {
- phoneticStrings.put(strings.size()-1, rphCharacters.toString());
- rphCharacters.setLength(0);
- }
} else if ("t".equals(localName)) {
tIsOpen = false;
} else if ("rPh".equals(localName)) {
@@ -261,9 +276,9 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
public void characters(char[] ch, int start, int length)
throws SAXException {
if (tIsOpen) {
- if (inRPh) {
- rphCharacters.append(ch, start, length);
- } else {
+ if (inRPh && includePhoneticRuns) {
+ characters.append(ch, start, length);
+ } else if (! inRPh){
characters.append(ch, start, length);
}
}
diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
index 18db97f433..e49c11c2ea 100644
--- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
@@ -16,6 +16,7 @@
==================================================================== */
package org.apache.poi.xssf.extractor;
+import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
@@ -23,8 +24,6 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
-import javax.xml.parsers.ParserConfigurationException;
-
import org.apache.poi.POIXMLProperties;
import org.apache.poi.POIXMLProperties.CoreProperties;
import org.apache.poi.POIXMLProperties.CustomProperties;
@@ -64,6 +63,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
private boolean includeCellComments = false;
private boolean includeHeadersFooters = true;
private boolean formulasNotResults = false;
+ private boolean concatenatePhoneticRuns = true;
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
this(OPCPackage.open(path));
@@ -120,6 +120,14 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
this.includeCellComments = includeCellComments;
}
+ /**
+ * Concatenate text from &lt;rPh&gt; text elements in SharedStringsTable
+ * Default is true;
+ * @param concatenatePhoneticRuns
+ */
+ public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
+ this.concatenatePhoneticRuns = concatenatePhoneticRuns;
+ }
public void setLocale(Locale locale) {
this.locale = locale;
}
@@ -189,7 +197,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
*/
public String getText() {
try {
- ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container);
+ ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
XSSFReader xssfReader = new XSSFReader(container);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();