<changes>
<release version="3.8-beta1" date="2010-??-??">
+ <action dev="poi-developers" type="add">50076 - Allow access from XSSFReader to sheet comments and headers/footers</action>
<action dev="poi-developers" type="add">50076 - Refactor XSSFEventBasedExcelExtractor to make it easier for you to have control over outputting the cell contents</action>
<action dev="poi-developers" type="fix">50258 - avoid corruption of XSSFWorkbook after applying XSSFRichTextRun#applyFont</action>
<action dev="poi-developers" type="fix">50154 - Allow white spaces and unicode in OPC relationship targets </action>
import org.apache.poi.POIXMLProperties.CoreProperties;
import org.apache.poi.POIXMLProperties.CustomProperties;
import org.apache.poi.POIXMLProperties.ExtendedProperties;
+import org.apache.poi.openxml4j.opc.OPCPackage;
public abstract class POIXMLTextExtractor extends POITextExtractor {
/** The POIXMLDocument that's open */
/**
* Returns opened document
*/
- public final POIXMLDocument getDocument(){
+ public final POIXMLDocument getDocument() {
return _document;
}
+ /**
+ * Returns the opened OPCPackage that contains the document
+ */
+ public OPCPackage getPackage() {
+ return _document.getPackage();
+ }
/**
* Returns an OOXML properties text extractor for the
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRelation;
* Current CTSheet bean
*/
private CTSheet ctSheet;
-
+
/**
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
public String getSheetName() {
return ctSheet.getName();
}
+
+ /**
+ * Returns the comments associated with this sheet,
+ * or null if there aren't any
+ */
+ public CommentsTable getSheetComments() {
+ PackagePart sheetPkg = getSheetPart();
+
+ // Do we have a comments relationship? (Only ever one if so)
+ try {
+ PackageRelationshipCollection commentsList =
+ sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
+ if(commentsList.size() > 0) {
+ PackageRelationship comments = commentsList.getRelationship(0);
+ PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
+ PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
+ return new CommentsTable(commentsPart, comments);
+ }
+ } catch (InvalidFormatException e) {
+ return null;
+ } catch (IOException e) {
+ return null;
+ }
+ return null;
+ }
+
+ public PackagePart getSheetPart() {
+ String sheetId = ctSheet.getId();
+ return sheetMap.get(sheetId);
+ }
+ /**
+ * We're read only, so remove isn't supported
+ */
public void remove() {
throw new IllegalStateException("Not supported");
}
private boolean vIsOpen;
// Set when F start element is seen
private boolean fIsOpen;
+ // Set when a header/footer element is seen
+ private boolean hfIsOpen;
// Set when cell start element is seen;
// used when cell close element is seen.
// Gathers characters as they are seen.
private StringBuffer value = new StringBuffer();
private StringBuffer formula = new StringBuffer();
+ private StringBuffer headerFooter = new StringBuffer();
/**
* Accepts objects needed while parsing.
*
* @param styles Table of styles
* @param strings Table of shared strings
- * @param cols Minimum number of columns to show
- * @param target Sink for output
*/
public XSSFSheetXMLHandler(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetContentsHandler,
+ DataFormatter dataFormatter,
boolean formulasNotResults) {
this.stylesTable = styles;
this.sharedStringsTable = strings;
this.output = sheetContentsHandler;
this.formulasNotResults = formulasNotResults;
this.nextDataType = xssfDataType.NUMBER;
- this.formatter = new DataFormatter();
+ this.formatter = dataFormatter;
+ }
+ /**
+ * Accepts objects needed while parsing.
+ *
+ * @param styles Table of styles
+ * @param strings Table of shared strings
+ */
+ public XSSFSheetXMLHandler(
+ StylesTable styles,
+ ReadOnlySharedStringsTable strings,
+ SheetContentsHandler sheetContentsHandler,
+ boolean formulasNotResults) {
+ this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
}
public void startElement(String uri, String localName, String name,
fIsOpen = true;
}
}
+ else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
+ "firstHeader".equals(name) || "firstFooter".equals(name) ||
+ "oddFooter".equals(name) || "evenFooter".equals(name)) {
+ hfIsOpen = true;
+ // Clear contents cache
+ headerFooter.setLength(0);
+ }
else if("row".equals(name)) {
int rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
output.startRow(rowNum);
} else if ("row".equals(name)) {
output.endRow();
}
+ else if("oddHeader".equals(name) || "evenHeader".equals(name) ||
+ "firstHeader".equals(name)) {
+ hfIsOpen = false;
+ output.headerFooter(headerFooter.toString(), true, name);
+ }
+ else if("oddFooter".equals(name) || "evenFooter".equals(name) ||
+ "firstFooter".equals(name)) {
+ hfIsOpen = false;
+ output.headerFooter(headerFooter.toString(), false, name);
+ }
}
/**
if (fIsOpen) {
formula.append(ch, start, length);
}
+ if (hfIsOpen) {
+ headerFooter.append(ch, start, length);
+ }
}
/**
public void endRow();
/** A cell, with the given formatted value, was encountered */
public void cell(String cellReference, String formattedValue);
+ /** A header or footer has been encountered */
+ public void headerFooter(String text, boolean isHeader, String tagName);
}
}
import java.io.IOException;
import java.io.InputStream;
+import java.util.Locale;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
+import org.apache.poi.POIXMLProperties;
import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.POIXMLProperties.CoreProperties;
+import org.apache.poi.POIXMLProperties.CustomProperties;
+import org.apache.poi.POIXMLProperties.ExtendedProperties;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
*/
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
private OPCPackage container;
+ private POIXMLProperties properties;
+
+ private Locale locale;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
public XSSFEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
super(null);
this.container = container;
+
+ properties = new POIXMLProperties(container);
}
public static void main(String[] args) throws Exception {
this.formulasNotResults = formulasNotResults;
}
+ public void setLocale(Locale locale) {
+ this.locale = locale;
+ }
+
+ /**
+ * Returns the opened OPCPackage container.
+ */
+ @Override
+ public OPCPackage getPackage() {
+ return container;
+ }
+
+ /**
+ * Returns the core document properties
+ */
+ @Override
+ public CoreProperties getCoreProperties() {
+ return properties.getCoreProperties();
+ }
+ /**
+ * Returns the extended document properties
+ */
+ @Override
+ public ExtendedProperties getExtendedProperties() {
+ return properties.getExtendedProperties();
+ }
+ /**
+ * Returns the custom document properties
+ */
+ @Override
+ public CustomProperties getCustomProperties() {
+ return properties.getCustomProperties();
+ }
+
/**
* Processes the given sheet
*/
public void processSheet(
- SheetTextExtractor sheetExtractor,
+ SheetContentsHandler sheetContentsExtractor,
StylesTable styles,
ReadOnlySharedStringsTable strings,
InputStream sheetInputStream)
throws IOException, SAXException {
+ DataFormatter formatter;
+ if(locale == null) {
+ formatter = new DataFormatter();
+ } else {
+ formatter = new DataFormatter(locale);
+ }
+
InputSource sheetSource = new InputSource(sheetInputStream);
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
try {
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader();
- ContentHandler handler = new XSSFSheetXMLHandler(styles, strings, sheetExtractor, formulasNotResults);
+ ContentHandler handler = new XSSFSheetXMLHandler(
+ styles, strings, sheetContentsExtractor, formatter, formulasNotResults);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch(ParserConfigurationException e) {
}
output.append(formattedValue);
}
+
+ public void headerFooter(String text, boolean isHeader, String tagName) {
+ // We don't include headers in the output yet, so ignore
+ }
}
}
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.POIDataSamples;
assertEquals(4, count);
}
+ public void testComments() throws Exception {
+ OPCPackage pkg = XSSFTestDataSamples.openSamplePackage("comments.xlsx");
+ XSSFReader r = new XSSFReader(pkg);
+ XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData();
+
+ int count = 0;
+ while(it.hasNext()) {
+ count++;
+ InputStream inp = it.next();
+ inp.close();
+
+ if(count == 1) {
+ assertNotNull(it.getSheetComments());
+ CommentsTable ct = it.getSheetComments();
+ assertEquals(1, ct.getNumberOfAuthors());
+ assertEquals(3, ct.getNumberOfComments());
+ } else {
+ assertNull(it.getSheetComments());
+ }
+ }
+ assertEquals(3, count);
+ }
/**
* Iterating over a workbook with chart sheets in it, using the