public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
private WorkbookDocument workbookDoc;
-
private SharedStringsTable sharedStrings;
public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
WorksheetDocument.Factory.parse(sheetPart.getInputStream());
return sheetDoc.getWorksheet();
}
-
+
+ /**
+ * Returns the shared string at the given index
+ */
public String getSharedString(int index) {
return this.sharedStrings.get(index);
}
+ protected SharedStringsTable _getSharedStringsTable() {
+ return sharedStrings;
+ }
}
package org.apache.poi.hssf.model;
import java.io.IOException;
-import java.io.InputStream;
+import java.io.OutputStream;
import java.util.LinkedList;
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-
+import org.apache.xmlbeans.XmlException;
import org.openxml4j.opc.PackagePart;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
-import org.xml.sax.SAXException;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
public class SharedStringsTable extends LinkedList<String> {
-
- private static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
-
- /** XXX: should have been using an XMLBeans object, but it cannot parse the sharedStrings schema, so we'll use DOM temporarily.
- CTSst sst;
- */
-
+ public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
+
+ private SstDocument doc;
private PackagePart part;
- private DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
-
- private DocumentBuilder parser;
-
- public SharedStringsTable(PackagePart part) throws IOException {
+ public SharedStringsTable(PackagePart part) throws IOException, XmlException {
this.part = part;
- InputStream is = part.getInputStream();
- try {
- builderFactory.setNamespaceAware(true);
- this.parser = builderFactory.newDocumentBuilder();
- readFrom(is);
- } catch (ParserConfigurationException e) {
- throw new RuntimeException(e);
- } catch (SAXException e) {
- throw new RuntimeException(e);
- } finally {
- if (is != null) is.close();
- }
-
-
+ doc = SstDocument.Factory.parse(
+ part.getInputStream()
+ );
+ read();
}
- public void readFrom(InputStream is) throws IOException, SAXException {
- Document doc = parser.parse(is);
- Element root = doc.getDocumentElement();
- NodeList sis = root.getElementsByTagNameNS(MAIN_SML_NS_URI, "si");
- for (int i = 0 ; i < sis.getLength() ; ++i) {
- Element si = (Element) sis.item(i);
- NodeList ts = si.getElementsByTagNameNS(MAIN_SML_NS_URI, "t");
- String t = "";
- if (ts.getLength() > 0 && ts.item(0).getFirstChild() != null) {
- t = ts.item(0).getFirstChild().getNodeValue();
- add(t);
- }
- }
+ private void read() {
+ CTRst[] sts = doc.getSst().getSiArray();
+ for (int i = 0; i < sts.length; i++) {
+ add(sts[i].getT());
+ }
+ }
+
+ /**
+ * Writes the current shared strings table into
+ * the associated OOXML PackagePart
+ */
+ public void write() throws IOException {
+ CTSst sst = doc.getSst();
+
+ // Remove the old list
+ for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) {
+ sst.removeSi(i);
+ }
+
+ // Add the new one
+ for(String s : this) {
+ sst.addNewSi().setT(s);
+ }
+
+ // Update the counts
+ sst.setCount(this.size());
+ sst.setUniqueCount(this.size());
+
+ // Write out
+ OutputStream out = part.getOutputStream();
+ doc.save(out);
+ out.close();
}
}
switch (cell.getT().intValue()) {
case STCellType.INT_S:
return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
+ case STCellType.INT_INLINE_STR:
+ return cell.getV();
case STCellType.INT_N:
return cell.getV();
// TODO: support other types
import java.io.File;
+import org.apache.poi.hssf.model.SharedStringsTable;
import org.apache.poi.hxf.HXFDocument;
import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackagePart;
assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
}
+
+ public void testSharedStringBasics() throws Exception {
+ HSSFXML xml = new HSSFXML(
+ HXFDocument.openPackage(sampleFile)
+ );
+ assertNotNull(xml._getSharedStringsTable());
+
+ SharedStringsTable sst = xml._getSharedStringsTable();
+ assertEquals(10, sst.size());
+
+ assertEquals("Lorem", sst.get(0));
+ for(int i=0; i<sst.size(); i++) {
+ assertEquals(sst.get(i), xml.getSharedString(i));
+ }
+
+ // Add a few more, then save and reload, checking
+ // changes have been kept
+ sst.add("Foo");
+ sst.add("Bar");
+ sst.set(0, "LoremLorem");
+
+ sst.write();
+
+ xml = new HSSFXML(xml.getPackage());
+ sst = xml._getSharedStringsTable();
+ assertEquals(12, sst.size());
+
+ assertEquals("LoremLorem", sst.get(0));
+ for(int i=0; i<sst.size(); i++) {
+ assertEquals(sst.get(i), xml.getSharedString(i));
+ }
+ }
}
\ No newline at end of file
extractor.setIncludeSheetNames(false);
text = extractor.getText();
assertEquals(
- "0\t111\n" +
- "1\t222\n" +
- "2\t333\n" +
- "3\t444\n" +
- "4\t555\n" +
- "5\t666\n" +
- "6\t777\n" +
- "7\t888\n" +
- "8\t999\n" +
- "9\t4995\n" +
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n" +
+ "at\t4995\n" +
"\n\n", text);
// Now get formulas not their values
extractor.setFormulasNotResults(true);
text = extractor.getText();
assertEquals(
- "0\t111\n" +
- "1\t222\n" +
- "2\t333\n" +
- "3\t444\n" +
- "4\t555\n" +
- "5\t666\n" +
- "6\t777\n" +
- "7\t888\n" +
- "8\t999\n" +
- "9\tSUM(B1:B9)\n" +
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n" +
+ "at\tSUM(B1:B9)\n" +
"\n\n", text);
// With sheet names too
text = extractor.getText();
assertEquals(
"Sheet1\n" +
- "0\t111\n" +
- "1\t222\n" +
- "2\t333\n" +
- "3\t444\n" +
- "4\t555\n" +
- "5\t666\n" +
- "6\t777\n" +
- "7\t888\n" +
- "8\t999\n" +
- "9\tSUM(B1:B9)\n\n" +
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n" +
+ "at\tSUM(B1:B9)\n\n" +
"Sheet2\n\n" +
"Sheet3\n"
, text);
assertTrue(text.length() > 0);
// Might not have all formatting it should do!
+ // TODO decide if we should really have the "null" in there
assertTrue(text.startsWith(
"Avgtxfull\n" +
- "3\t13\t3\t2\t2\t3\t2\t"
+ "null\t(iii) AVERAGE TAX RATES ON ANNUAL"
));
}
POITextExtractor extractor = extractors[i];
String text = extractor.getText().replaceAll("[\r\t]", "");
- System.out.println(text.length());
- System.out.println(text);
+ //System.out.println(text.length());
+ //System.out.println(text);
assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
Matcher m = pattern.matcher(text);