import java.io.IOException;
+import org.apache.poi.hssf.model.SharedStringsTable;
import org.apache.poi.hxf.HXFDocument;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.OpenXML4JException;
public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml";
public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml";
public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml";
+ public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
private WorkbookDocument workbookDoc;
+ private SharedStringsTable sharedStrings;
+
public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
super(container, MAIN_CONTENT_TYPE);
workbookDoc =
WorkbookDocument.Factory.parse(basePart.getInputStream());
+
+ PackagePart ssPart = getSinglePartByRelationType(SHARED_STRINGS_RELATION_TYPE, basePart);
+ if (ssPart != null) {
+ sharedStrings = new SharedStringsTable(ssPart);
+ } else {
+
+ }
}
/**
WorksheetDocument.Factory.parse(sheetPart.getInputStream());
return sheetDoc.getWorksheet();
}
+
+ public String getSharedString(int index) {
+ return this.sharedStrings.get(index);
+ }
}
}
}
if(!done) {
- HSSFXMLCell uCell = new HSSFXMLCell(cell);
+ HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
text.append(uCell.getStringValue());
}
}
package org.apache.poi.hssf.usermodel;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;
/**
* User facing wrapper around an underlying cell object
*/
public class HSSFXMLCell {
- private CTCell cell;
- public HSSFXMLCell(CTCell rawCell) {
- this.cell = rawCell;
- }
-
- /**
- * Formats the cell's contents, based on its type,
- * and returns it as a string.
- */
- public String getStringValue() {
- if(cell.getV() != null) {
- return cell.getV();
- }
- if(cell.getIs() != null) {
- return cell.getIs().getT();
- }
- // TODO: Formatting
- return Long.toString(cell.getS());
- }
-
- public String toString() {
- return cell.getR() + " - " + getStringValue();
- }
+ private CTCell cell;
+
+ /** The workbook to which this cell belongs */
+ private final HSSFXMLWorkbook workbook;
+
+ public HSSFXMLCell(CTCell rawCell, HSSFXMLWorkbook workbook) {
+ this.cell = rawCell;
+ this.workbook = workbook;
+ }
+
+ /**
+ * Formats the cell's contents, based on its type,
+ * and returns it as a string.
+ */
+ public String getStringValue() {
+
+ switch (cell.getT().intValue()) {
+ case STCellType.INT_S:
+ return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
+ case STCellType.INT_N:
+ return cell.getV();
+ // TODO: support other types
+ default:
+ return "UNSUPPORTED CELL TYPE: '" + cell.getT() + "'";
+ }
+ }
+
+ public String toString() {
+ return cell.getR() + " - " + getStringValue();
+ }
}
public HSSFXML _getHSSFXML() {
return hssfXML;
}
+
+ public String getSharedString(int index) {
+ return hssfXML.getSharedString(index);
+ }
}
return parts.get(0);
}
+ /**
+ * Fetches the (single) PackagePart which is defined as
+ * the supplied relation content type of the specified part,
+ * or null if none found.
+ * @param relationType The relation content type to search for
+ * @throws IllegalArgumentException If we find more than one part of that type
+ * TODO: this sucks! Make Package and PackagePart implement common intf that defines getRelationshipsByType & friends
+ */
+ protected PackagePart getSinglePartByRelationType(String relationType, PackagePart part) throws IllegalArgumentException, OpenXML4JException {
+ PackageRelationshipCollection rels =
+ part.getRelationshipsByType(relationType);
+ if(rels.size() == 0) {
+ return null;
+ }
+ if(rels.size() > 1) {
+ throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!");
+ }
+ PackageRelationship rel = rels.getRelationship(0);
+ return getPackagePart(rel);
+ }
+
/**
* Fetches the (single) PackagePart which is defined as
* the supplied relation content type of the base
* @param relationType The relation content type to search for
* @throws IllegalArgumentException If we find more than one part of that type
*/
- private PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
+ protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
PackageRelationshipCollection rels =
container.getRelationshipsByType(relationType);
if(rels.size() == 0) {
import java.io.File;
import java.io.FileInputStream;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import junit.framework.TestCase;
* ExcelExtractor does, when we're both passed
* the same file, just saved as xls and xlsx
*/
- public void BROKENtestComparedToOLE2() throws Exception {
+ public void testComparedToOLE2() throws Exception {
HXFExcelExtractor ooxmlExtractor =
new HXFExcelExtractor(simpleXLSX.getPackage());
ExcelExtractor ole2Extractor =
for (int i = 0; i < extractors.length; i++) {
POITextExtractor extractor = extractors[i];
- String text = extractor.getText().replace("\r", "");
+ String text = extractor.getText().replaceAll("[\r\t]", "");
System.out.println(text.length());
System.out.println(text);
- assertTrue(text.startsWith("First Sheet\nTest spreadsheet\t\n2nd row\t2nd row 2nd column\n"));
- assertTrue(text.endsWith("13.0\nSheet3\n"));
-
- assertTrue(text.length() >= 214);
- assertTrue(text.length() <= 214);
+ assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
+ Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
+ Matcher m = pattern.matcher(text);
+ assertTrue(m.matches());
}
}
}