diff options
author | Nick Burch <nick@apache.org> | 2010-01-26 11:10:48 +0000 |
---|---|---|
committer | Nick Burch <nick@apache.org> | 2010-01-26 11:10:48 +0000 |
commit | 675b6ad97661a30f696ee22774f7e50bb5a0ea63 (patch) | |
tree | 0ce71c28eacdc7c4f46b48503c5018d819300b9d /src/ooxml | |
parent | 4c1c3a3ae3a5a86d8fbe42cb1e89afa18c1a8ce1 (diff) | |
download | poi-675b6ad97661a30f696ee22774f7e50bb5a0ea63.tar.gz poi-675b6ad97661a30f696ee22774f7e50bb5a0ea63.zip |
The ReadOnlySharedStringsTable is re-usable for other event based things, so copy out to it's own class
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@903170 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/ooxml')
-rw-r--r-- | src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java | 221 |
1 files changed, 221 insertions, 0 deletions
diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java new file mode 100644 index 0000000000..52e2c258ae --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java @@ -0,0 +1,221 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xssf.eventusermodel; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.DefaultHandler; + +/** + * <p>This is a lightweight way to process the Shared Strings + * table. Most of the text cells will reference something + * from in here. + * <p>Note that each SI entry can have multiple T elements, if the + * string is made up of bits with different formatting. + * <p>Example input: + * <pre> +<?xml version="1.0" encoding="UTF-8" standalone="yes" ?> +<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2"> + <si> + <r> + <rPr> + <b /> + <sz val="11" /> + <color theme="1" /> + <rFont val="Calibri" /> + <family val="2" /> + <scheme val="minor" /> + </rPr> + <t>This:</t> + </r> + <r> + <rPr> + <sz val="11" /> + <color theme="1" /> + <rFont val="Calibri" /> + <family val="2" /> + <scheme val="minor" /> + </rPr> + <t xml:space="preserve">Causes Problems</t> + </r> + </si> + <si> + <t>This does not</t> + </si> +</sst> +* </pre> + * + */ +public class ReadOnlySharedStringsTable extends DefaultHandler { + /** + * An integer representing the total count of strings in the workbook. This count does not + * include any numbers, it counts only the total of text strings in the workbook. + */ + private int count; + + /** + * An integer representing the total count of unique strings in the Shared String Table. + * A string is unique even if it is a copy of another string, but has different formatting applied + * at the character level. + */ + private int uniqueCount; + + /** + * The shared strings table. + */ + private String[] strings; + + /** + * @param pkg + * @throws IOException + * @throws SAXException + * @throws ParserConfigurationException + */ + public ReadOnlySharedStringsTable(OPCPackage pkg) + throws IOException, SAXException { + ArrayList<PackagePart> parts = + pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType()); + + // Some workbooks have no shared strings table. + if (parts.size() > 0) { + PackagePart sstPart = parts.get(0); + readFrom(sstPart.getInputStream()); + } + } + + /** + * Like POIXMLDocumentPart constructor + * + * @param part + * @param rel_ignored + * @throws IOException + */ + public ReadOnlySharedStringsTable(PackagePart part, PackageRelationship rel_ignored) + throws IOException, SAXException { + readFrom(part.getInputStream()); + } + + /** + * Read this shared strings table from an XML file. + * + * @param is The input stream containing the XML document. + * @throws IOException if an error occurs while reading. + * @throws SAXException + * @throws ParserConfigurationException + */ + public void readFrom(InputStream is) throws IOException, SAXException { + InputSource sheetSource = new InputSource(is); + SAXParserFactory saxFactory = SAXParserFactory.newInstance(); + try { + SAXParser saxParser = saxFactory.newSAXParser(); + XMLReader sheetParser = saxParser.getXMLReader(); + sheetParser.setContentHandler(this); + sheetParser.parse(sheetSource); + } catch(ParserConfigurationException e) { + throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); + } + } + + /** + * Return an integer representing the total count of strings in the workbook. This count does not + * include any numbers, it counts only the total of text strings in the workbook. + * + * @return the total count of strings in the workbook + */ + public int getCount() { + return this.count; + } + + /** + * Returns an integer representing the total count of unique strings in the Shared String Table. + * A string is unique even if it is a copy of another string, but has different formatting applied + * at the character level. + * + * @return the total count of unique strings in the workbook + */ + public int getUniqueCount() { + return this.uniqueCount; + } + + /** + * Return the string at a given index. + * Formatting is ignored. + * + * @param idx index of item to return. + * @return the item at the specified position in this Shared String table. + */ + public String getEntryAt(int idx) { + return strings[idx]; + } + + //// ContentHandler methods //// + + private StringBuffer characters; + private boolean tIsOpen; + private int index; + + public void startElement(String uri, String localName, String name, + Attributes attributes) throws SAXException { + if ("sst".equals(name)) { + String count = attributes.getValue("count"); + String uniqueCount = attributes.getValue("uniqueCount"); + this.count = Integer.parseInt(count); + this.uniqueCount = Integer.parseInt(uniqueCount); + this.strings = new String[this.uniqueCount]; + index = 0; + characters = new StringBuffer(); + } else if ("si".equals(name)) { + characters.setLength(0); + } else if ("t".equals(name)) { + tIsOpen = true; + } + } + + public void endElement(String uri, String localName, String name) + throws SAXException { + if ("si".equals(name)) { + strings[index] = characters.toString(); + ++index; + } else if ("t".equals(name)) { + tIsOpen = false; + } + } + + /** + * Captures characters only if a t(ext) element is open. + */ + public void characters(char[] ch, int start, int length) + throws SAXException { + if (tIsOpen) + characters.append(ch, start, length); + } + +} |