]> source.dussan.org Git - poi.git/commitdiff
60826 -- add initial support for streaming reading of xlsb files.
authorTim Allison <tallison@apache.org>
Thu, 16 Mar 2017 18:37:13 +0000 (18:37 +0000)
committerTim Allison <tallison@apache.org>
Thu, 16 Mar 2017 18:37:13 +0000 (18:37 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1787228 13f79535-47bb-0310-9956-ffa450edef68

37 files changed:
src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
src/ooxml/java/org/apache/poi/xssf/XLSBUnsupportedException.java
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellHeader.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellRange.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBComment.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCommentsTable.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooter.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooters.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParseException.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRecordType.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRelation.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichStr.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichTextString.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBStylesTable.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBUtils.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/binary/package.html [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java
src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java [new file with mode: 0644]
test-data/spreadsheet/51519.xlsb [new file with mode: 0644]
test-data/spreadsheet/WithTextBox.xlsb [new file with mode: 0644]
test-data/spreadsheet/comments.xlsb [new file with mode: 0644]
test-data/spreadsheet/date.xlsb [new file with mode: 0644]
test-data/spreadsheet/hyperlink.xlsb [new file with mode: 0644]
test-data/spreadsheet/sample.xlsb [new file with mode: 0644]
test-data/spreadsheet/testVarious.xlsb [new file with mode: 0644]

index 7533a27426962e7ea82b4f835dba1ef5ac39ecd6..faae5bacbdfea740ae956e5012c6d8bc56504260 100644 (file)
@@ -56,6 +56,7 @@ import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
 import org.apache.poi.xslf.usermodel.XSLFRelation;
 import org.apache.poi.xslf.usermodel.XSLFSlideShow;
+import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
 import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
 import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
 import org.apache.poi.xssf.usermodel.XSSFRelation;
@@ -244,6 +245,13 @@ public class ExtractorFactory {
                 return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
             }
 
+            // How about xlsb?
+            for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
+                if (rel.getContentType().equals(contentType)) {
+                    return new XSSFBEventBasedExcelExtractor(pkg);
+                }
+            }
+
             throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
 
         } catch (IOException e) {
index 63260276f8e0f738c274816d5475dfc944cad61c..c6ebcff542754d365941fa4a9c8b87e83cbe973f 100644 (file)
@@ -19,7 +19,9 @@ package org.apache.poi.xssf;
 import org.apache.poi.UnsupportedFileFormatException;
 
 /**
- * We don't support .xlsb files, sorry
+ * We don't support .xlsb for read and write via {@link org.apache.poi.xssf.usermodel.XSSFWorkbook}.
+ * As of POI 3.15-beta3, we do support streaming reading of xlsb files
+ * via {@link org.apache.poi.xssf.eventusermodel.XSSFBReader}
  */
 public class XLSBUnsupportedException extends UnsupportedFileFormatException {
     private static final long serialVersionUID = 7849681804154571175L;
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellHeader.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellHeader.java
new file mode 100644 (file)
index 0000000..5b427ae
--- /dev/null
@@ -0,0 +1,71 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.ss.util.CellReference;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * This class encapsulates what the spec calls a "Cell" object.
+ * I added "Header" to clarify that this does not contain the contents
+ * of the cell, only the column number, the style id and the phonetic boolean
+ */
+@Internal
+class XSSFBCellHeader {
+    public static int length = 8;
+
+    /**
+     *
+     * @param data raw data
+     * @param offset offset at which to start reading the record
+     * @param currentRow 0-based current row count
+     * @param cell cell buffer to update
+     */
+    public static void parse(byte[] data, int offset,  int currentRow, XSSFBCellHeader cell) {
+        long colNum = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+        int styleIdx = XSSFBUtils.get24BitInt(data, offset); offset += 3;
+        //TODO: range checking
+        boolean showPhonetic = false;//TODO: fill this out
+        cell.reset(currentRow, (int)colNum, styleIdx, showPhonetic);
+    }
+
+    private int rowNum;
+    private int colNum;
+    private int styleIdx;
+    private boolean showPhonetic;
+
+    public void reset(int rowNum, int colNum, int styleIdx, boolean showPhonetic) {
+        this.rowNum = rowNum;
+        this.colNum = colNum;
+        this.styleIdx = styleIdx;
+        this.showPhonetic = showPhonetic;
+    }
+
+    int getColNum() {
+        return colNum;
+    }
+
+    String formatAddressAsString() {
+        return CellReference.convertNumToColString(colNum)+(rowNum+1);
+    }
+
+    int getStyleIdx() {
+        return styleIdx;
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellRange.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellRange.java
new file mode 100644 (file)
index 0000000..3e2e79d
--- /dev/null
@@ -0,0 +1,54 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+
+@Internal
+class XSSFBCellRange {
+
+    public final static int length = 4* LittleEndian.INT_SIZE;
+    /**
+     * Parses an RfX cell range from the data starting at the offset.
+     * This performs no range checking.
+     * @param data raw bytes
+     * @param offset offset at which to start reading from data
+     * @param cellRange to overwrite. If null, a new cellRange will be created.
+     * @return a mutable cell range.
+     */
+    public static XSSFBCellRange parse(byte[] data, int offset, XSSFBCellRange cellRange) {
+        if (cellRange == null) {
+            cellRange = new XSSFBCellRange();
+        }
+        cellRange.firstRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
+        cellRange.lastRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
+        cellRange.firstCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
+        cellRange.lastCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset));
+
+        return cellRange;
+    }
+
+    int firstRow;
+    int lastRow;
+    int firstCol;
+    int lastCol;
+
+
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBComment.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBComment.java
new file mode 100644 (file)
index 0000000..ae7c1c5
--- /dev/null
@@ -0,0 +1,112 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+
+import org.apache.poi.ss.usermodel.ClientAnchor;
+import org.apache.poi.ss.usermodel.RichTextString;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.util.Internal;
+import org.apache.poi.xssf.usermodel.XSSFComment;
+
+@Internal
+class XSSFBComment extends XSSFComment {
+
+    private final CellAddress cellAddress;
+    private final String author;
+    private final XSSFBRichTextString comment;
+    private boolean visible = true;
+
+    XSSFBComment(CellAddress cellAddress, String author, String comment) {
+        super(null, null, null);
+        this.cellAddress = cellAddress;
+        this.author = author;
+        this.comment = new XSSFBRichTextString(comment);
+    }
+
+    @Override
+    public void setVisible(boolean visible) {
+        throw new IllegalArgumentException("XSSFBComment is read only.");
+    }
+
+    @Override
+    public boolean isVisible() {
+        return visible;
+    }
+
+    @Override
+    public CellAddress getAddress() {
+        return cellAddress;
+    }
+
+    @Override
+    public void setAddress(CellAddress addr) {
+        throw new IllegalArgumentException("XSSFBComment is read only");
+    }
+
+    @Override
+    public void setAddress(int row, int col) {
+        throw new IllegalArgumentException("XSSFBComment is read only");
+
+    }
+
+    @Override
+    public int getRow() {
+        return cellAddress.getRow();
+    }
+
+    @Override
+    public void setRow(int row) {
+        throw new IllegalArgumentException("XSSFBComment is read only");
+    }
+
+    @Override
+    public int getColumn() {
+        return cellAddress.getColumn();
+    }
+
+    @Override
+    public void setColumn(int col) {
+        throw new IllegalArgumentException("XSSFBComment is read only");
+    }
+
+    @Override
+    public String getAuthor() {
+        return author;
+    }
+
+    @Override
+    public void setAuthor(String author) {
+        throw new IllegalArgumentException("XSSFBComment is read only");
+    }
+
+    @Override
+    public XSSFBRichTextString getString() {
+        return comment;
+    }
+
+    @Override
+    public void setString(RichTextString string) {
+        throw new IllegalArgumentException("XSSFBComment is read only");
+    }
+
+    @Override
+    public ClientAnchor getClientAnchor() {
+        return null;
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCommentsTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCommentsTable.java
new file mode 100644 (file)
index 0000000..642eaf9
--- /dev/null
@@ -0,0 +1,113 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.TreeMap;
+
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+
+@Internal
+public class XSSFBCommentsTable extends XSSFBParser {
+
+    private Map<CellAddress, XSSFBComment> comments = new TreeMap<CellAddress, XSSFBComment>(new CellAddressComparator());//String is the cellAddress A1
+    private Queue<CellAddress> commentAddresses = new LinkedList<CellAddress>();
+    private List<String> authors = new ArrayList<String>();
+
+    //these are all used only during parsing, and they are mutable!
+    private int authorId = -1;
+    private CellAddress cellAddress = null;
+    private XSSFBCellRange cellRange = null;
+    private String comment = null;
+    private StringBuilder authorBuffer = new StringBuilder();
+
+
+    public XSSFBCommentsTable(InputStream is) throws IOException {
+        super(is);
+        parse();
+        commentAddresses.addAll(comments.keySet());
+    }
+
+    @Override
+    public void handleRecord(int id, byte[] data) throws XSSFBParseException {
+        XSSFBRecordType recordType = XSSFBRecordType.lookup(id);
+        switch (recordType) {
+            case BrtBeginComment:
+                int offset = 0;
+                authorId = XSSFBUtils.castToInt(LittleEndian.getUInt(data)); offset += LittleEndian.INT_SIZE;
+                cellRange = XSSFBCellRange.parse(data, offset, cellRange);
+                offset+= XSSFBCellRange.length;
+                //for strict parsing; confirm that firstRow==lastRow and firstCol==colLats (2.4.28)
+                cellAddress = new CellAddress(cellRange.firstRow, cellRange.firstCol);
+                break;
+            case BrtCommentText:
+                XSSFBRichStr xssfbRichStr = XSSFBRichStr.build(data, 0);
+                comment = xssfbRichStr.getString();
+                break;
+            case BrtEndComment:
+                comments.put(cellAddress, new XSSFBComment(cellAddress, authors.get(authorId), comment));
+                authorId = -1;
+                cellAddress = null;
+                break;
+            case BrtCommentAuthor:
+                authorBuffer.setLength(0);
+                XSSFBUtils.readXLWideString(data, 0, authorBuffer);
+                authors.add(authorBuffer.toString());
+                break;
+        }
+    }
+
+
+    public Queue<CellAddress> getAddresses() {
+        return commentAddresses;
+    }
+
+    public XSSFBComment get(CellAddress cellAddress) {
+        if (cellAddress == null) {
+            return null;
+        }
+        return comments.get(cellAddress);
+    }
+
+    private final static class CellAddressComparator implements Comparator<CellAddress> {
+
+        @Override
+        public int compare(CellAddress o1, CellAddress o2) {
+            if (o1.getRow() < o2.getRow()) {
+                return -1;
+            } else if (o1.getRow() > o2.getRow()) {
+                return 1;
+            }
+            if (o1.getColumn() < o2.getColumn()) {
+                return -1;
+            } else if (o1.getColumn() > o2.getColumn()) {
+                return 1;
+            }
+            return 0;
+        }
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooter.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooter.java
new file mode 100644 (file)
index 0000000..1f43e35
--- /dev/null
@@ -0,0 +1,75 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper;
+
+@Internal
+class XSSFBHeaderFooter {
+    private final String headerFooterTypeLabel;
+    private final boolean isHeader;
+    private String rawString;
+    private HeaderFooterHelper headerFooterHelper = new HeaderFooterHelper();
+
+
+    XSSFBHeaderFooter(String headerFooterTypeLabel, boolean isHeader) {
+        this.headerFooterTypeLabel = headerFooterTypeLabel;
+        this.isHeader = isHeader;
+    }
+
+    String getHeaderFooterTypeLabel() {
+        return headerFooterTypeLabel;
+    }
+
+    String getRawString() {
+        return rawString;
+    }
+
+    String getString() {
+        StringBuilder sb = new StringBuilder();
+        String left = headerFooterHelper.getLeftSection(rawString);
+        String center = headerFooterHelper.getCenterSection(rawString);
+        String right = headerFooterHelper.getRightSection(rawString);
+        if (left != null && left.length() > 0) {
+            sb.append(left);
+        }
+        if (center != null && center.length() > 0) {
+            if (sb.length() > 0) {
+                sb.append(" ");
+            }
+            sb.append(center);
+        }
+        if (right != null && right.length() > 0) {
+            if (sb.length() > 0) {
+                sb.append(" ");
+            }
+            sb.append(right);
+        }
+        return sb.toString();
+    }
+
+    void setRawString(String rawString) {
+        this.rawString = rawString;
+    }
+
+    boolean isHeader() {
+        return isHeader;
+    }
+
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooters.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooters.java
new file mode 100644 (file)
index 0000000..c70b784
--- /dev/null
@@ -0,0 +1,87 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+class XSSFBHeaderFooters {
+
+    public static XSSFBHeaderFooters parse(byte[] data) {
+        boolean diffOddEven = false;
+        boolean diffFirst = false;
+        boolean scaleWDoc = false;
+        boolean alignMargins = false;
+
+        int offset = 2;
+        XSSFBHeaderFooters xssfbHeaderFooter = new XSSFBHeaderFooters();
+        xssfbHeaderFooter.header = new XSSFBHeaderFooter("header", true);
+        xssfbHeaderFooter.footer = new XSSFBHeaderFooter("footer", false);
+        xssfbHeaderFooter.headerEven = new XSSFBHeaderFooter("evenHeader", true);
+        xssfbHeaderFooter.footerEven = new XSSFBHeaderFooter("evenFooter", false);
+        xssfbHeaderFooter.headerFirst = new XSSFBHeaderFooter("firstHeader", true);
+        xssfbHeaderFooter.footerFirst = new XSSFBHeaderFooter("firstFooter", false);
+        offset += readHeaderFooter(data, offset, xssfbHeaderFooter.header);
+        offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footer);
+        offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerEven);
+        offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footerEven);
+        offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerFirst);
+        readHeaderFooter(data, offset, xssfbHeaderFooter.footerFirst);
+        return xssfbHeaderFooter;
+    }
+
+    private static int readHeaderFooter(byte[] data, int offset, XSSFBHeaderFooter headerFooter) {
+        if (offset + 4 >= data.length) {
+            return 0;
+        }
+        StringBuilder sb = new StringBuilder();
+        int bytesRead = XSSFBUtils.readXLNullableWideString(data, offset, sb);
+        headerFooter.setRawString(sb.toString());
+        return bytesRead;
+    }
+
+    private XSSFBHeaderFooter header;
+    private XSSFBHeaderFooter footer;
+    private XSSFBHeaderFooter headerEven;
+    private XSSFBHeaderFooter footerEven;
+    private XSSFBHeaderFooter headerFirst;
+    private XSSFBHeaderFooter footerFirst;
+
+    public XSSFBHeaderFooter getHeader() {
+        return header;
+    }
+
+    public XSSFBHeaderFooter getFooter() {
+        return footer;
+    }
+
+    public XSSFBHeaderFooter getHeaderEven() {
+        return headerEven;
+    }
+
+    public XSSFBHeaderFooter getFooterEven() {
+        return footerEven;
+    }
+
+    public XSSFBHeaderFooter getHeaderFirst() {
+        return headerFirst;
+    }
+
+    public XSSFBHeaderFooter getFooterFirst() {
+        return footerFirst;
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java
new file mode 100644 (file)
index 0000000..28c020c
--- /dev/null
@@ -0,0 +1,181 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.ss.util.CellRangeUtil;
+import org.apache.poi.util.Internal;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+
+@Internal
+public class XSSFBHyperlinksTable {
+
+    private final static BitSet RECORDS = new BitSet();
+
+
+    static {
+        RECORDS.set(XSSFBRecordType.BrtHLink.getId());
+    }
+
+
+    private final List<XSSFHyperlinkRecord> hyperlinkRecords = new ArrayList<XSSFHyperlinkRecord>();
+
+    //cache the relId to hyperlink url from the sheet's .rels
+    private Map<String, String> relIdToHyperlink = new HashMap<String, String>();
+
+    public XSSFBHyperlinksTable(PackagePart sheetPart) throws IOException {
+        //load the urls from the sheet .rels
+        loadUrlsFromSheetRels(sheetPart);
+        //now load the hyperlinks from the bottom of the sheet
+        HyperlinkSheetScraper scraper = new HyperlinkSheetScraper(sheetPart.getInputStream());
+        scraper.parse();
+    }
+
+    /**
+     *
+     * @return a map of the hyperlinks. The key is the top left cell address in their CellRange
+     */
+    public Map<CellAddress, List<XSSFHyperlinkRecord>> getHyperLinks() {
+        Map<CellAddress, List<XSSFHyperlinkRecord>> hyperlinkMap =
+                new TreeMap<CellAddress, List<XSSFHyperlinkRecord>>(new TopLeftCellAddressComparator());
+        for (XSSFHyperlinkRecord hyperlinkRecord : hyperlinkRecords) {
+            CellAddress cellAddress = new CellAddress(hyperlinkRecord.getCellRangeAddress().getFirstRow(),
+                    hyperlinkRecord.getCellRangeAddress().getFirstColumn());
+            List<XSSFHyperlinkRecord> list = hyperlinkMap.get(cellAddress);
+            if (list == null) {
+                list = new ArrayList<XSSFHyperlinkRecord>();
+            }
+            list.add(hyperlinkRecord);
+            hyperlinkMap.put(cellAddress, list);
+        }
+        return hyperlinkMap;
+    }
+
+
+    /**
+     *
+     * @param cellAddress cell address to find
+     * @return null if not a hyperlink
+     */
+    public List<XSSFHyperlinkRecord> findHyperlinkRecord(CellAddress cellAddress) {
+        List<XSSFHyperlinkRecord> overlapping = null;
+        CellRangeAddress targetCellRangeAddress = new CellRangeAddress(cellAddress.getRow(),
+                cellAddress.getRow(),
+                cellAddress.getColumn(),
+                cellAddress.getColumn());
+        for (XSSFHyperlinkRecord record : hyperlinkRecords) {
+            if (CellRangeUtil.intersect(targetCellRangeAddress, record.getCellRangeAddress()) != CellRangeUtil.NO_INTERSECTION) {
+                if (overlapping == null) {
+                    overlapping = new ArrayList<XSSFHyperlinkRecord>();
+                }
+                overlapping.add(record);
+            }
+        }
+        return overlapping;
+    }
+
+    private void loadUrlsFromSheetRels(PackagePart sheetPart) {
+        try {
+            for (PackageRelationship rel : sheetPart.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation())) {
+                relIdToHyperlink.put(rel.getId(), rel.getTargetURI().toString());
+            }
+        } catch (InvalidFormatException e) {
+            //swallow
+        }
+    }
+
+    private class HyperlinkSheetScraper extends XSSFBParser {
+
+        private XSSFBCellRange hyperlinkCellRange = new XSSFBCellRange();
+        private final StringBuilder xlWideStringBuffer = new StringBuilder();
+
+        HyperlinkSheetScraper(InputStream is) {
+            super(is, RECORDS);
+        }
+
+        @Override
+        public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+            if (recordType != XSSFBRecordType.BrtHLink.getId()) {
+                return;
+            }
+            int offset = 0;
+            String relId = "";
+            String location = "";
+            String toolTip = "";
+            String display = "";
+
+            hyperlinkCellRange = XSSFBCellRange.parse(data, offset, hyperlinkCellRange);
+            offset += XSSFBCellRange.length;
+            xlWideStringBuffer.setLength(0);
+            offset += XSSFBUtils.readXLNullableWideString(data, offset, xlWideStringBuffer);
+            relId = xlWideStringBuffer.toString();
+            xlWideStringBuffer.setLength(0);
+            offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
+            location = xlWideStringBuffer.toString();
+            xlWideStringBuffer.setLength(0);
+            offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
+            toolTip = xlWideStringBuffer.toString();
+            xlWideStringBuffer.setLength(0);
+            offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
+            display = xlWideStringBuffer.toString();
+            CellRangeAddress cellRangeAddress = new CellRangeAddress(hyperlinkCellRange.firstRow, hyperlinkCellRange.lastRow, hyperlinkCellRange.firstCol, hyperlinkCellRange.lastCol);
+
+            String url = relIdToHyperlink.get(relId);
+            if (location == null || location.length() == 0) {
+                location = url;
+            }
+
+            hyperlinkRecords.add(
+                    new XSSFHyperlinkRecord(cellRangeAddress, relId, location, toolTip, display)
+            );
+        }
+    }
+
+    private static class TopLeftCellAddressComparator implements Comparator<CellAddress> {
+
+        @Override
+        public int compare(CellAddress o1, CellAddress o2) {
+            if (o1.getRow() < o2.getRow()) {
+                return -1;
+            } else if (o1.getRow() > o2.getRow()) {
+                return 1;
+            }
+            if (o1.getColumn() < o2.getColumn()) {
+                return -1;
+            } else if (o1.getColumn() > o2.getColumn()) {
+                return 1;
+            }
+            return 0;
+        }
+    }
+
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParseException.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParseException.java
new file mode 100644 (file)
index 0000000..69ba7f0
--- /dev/null
@@ -0,0 +1,28 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+/**
+ * Parse exception while reading an xssfb
+ */
+public class XSSFBParseException extends RuntimeException {
+
+    public XSSFBParseException(String msg) {
+        super(msg);
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java
new file mode 100644 (file)
index 0000000..cace843
--- /dev/null
@@ -0,0 +1,105 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.BitSet;
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndianInputStream;
+
+/**
+ * Experimental parser for Microsoft's ooxml xssfb format.
+ * Not thread safe, obviously.  Need to create a new one
+ * for each thread.
+ */
+@Internal
+public abstract class XSSFBParser {
+
+    private final LittleEndianInputStream is;
+    private final BitSet records;
+
+    public XSSFBParser(InputStream is) {
+        this.is = new LittleEndianInputStream(is);
+        records = null;
+    }
+
+    XSSFBParser(InputStream is, BitSet bitSet) {
+        this.is = new LittleEndianInputStream(is);
+        records = bitSet;
+    }
+
+    public void parse() throws IOException {
+
+        while (true) {
+            int bInt = is.read();
+            if (bInt == -1) {
+                return;
+            }
+            readNext((byte) bInt);
+        }
+    }
+
+    private void readNext(byte b1) throws IOException {
+        int recordId = 0;
+
+        //if highest bit == 1
+        if ((b1 >> 7 & 1) == 1) {
+            byte b2 = is.readByte();
+            b1 &= ~(1<<7); //unset highest bit
+            b2 &= ~(1<<7); //unset highest bit (if it exists?)
+            recordId = (128*(int)b2)+(int)b1;
+        } else {
+            recordId = (int)b1;
+        }
+
+        long recordLength = 0;
+        int i = 0;
+        boolean halt = false;
+        while (i < 4 && ! halt) {
+            byte b = is.readByte();
+            halt = (b >> 7 & 1) == 0; //if highest bit !=1 then continue
+            b &= ~(1<<7);
+            recordLength += (int)b << (i*7); //multiply by 128^i
+            i++;
+
+        }
+        if (records == null || records.get(recordId)) {
+            //add sanity check for length?
+            byte[] buff = new byte[(int) recordLength];
+            is.readFully(buff);
+            handleRecord(recordId, buff);
+        } else {
+            long length = is.skip(recordLength);
+            if (length != recordLength) {
+                throw new XSSFBParseException("End of file reached before expected.\t"+
+                "Tried to skip "+recordLength + ", but only skipped "+length);
+            }
+        }
+    }
+
+    //It hurts, hurts, hurts to create a new byte array for every record.
+    //However, on a large Excel spreadsheet, this parser was 1/3 faster than
+    //the ooxml sax parser (5 seconds for xssfb and 7.5 seconds for xssf.
+    //The code is far cleaner to have the parser read all
+    //of the data rather than having every component promise that it read
+    //the correct amount.
+    abstract public void handleRecord(int recordType, byte[] data) throws XSSFBParseException;
+
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRecordType.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRecordType.java
new file mode 100644 (file)
index 0000000..65663f7
--- /dev/null
@@ -0,0 +1,92 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+public enum XSSFBRecordType {
+
+    BrtCellBlank(1),
+    BrtCellRk(2),
+    BrtCellError(3),
+    BrtCellBool(4),
+    BrtCellReal(5),
+    BrtCellSt(6),
+    BrtCellIsst(7),
+    BrtFmlaString(8),
+    BrtFmlaNum(9),
+    BrtFmlaBool(10),
+    BrtFmlaError(11),
+    BrtRowHdr(0),
+    BrtCellRString(62),
+    BrtBeginSheet(129),
+    BrtWsProp(147),
+    BrtWsDim(148),
+    BrtColInfo(60),
+    BrtBeginSheetData(145),
+    BrtEndSheetData(146),
+    BrtHLink(494),
+    BrtBeginHeaderFooter(479),
+
+    //comments
+    BrtBeginCommentAuthors(630),
+    BrtEndCommentAuthors(631),
+    BrtCommentAuthor(632),
+    BrtBeginComment(635),
+    BrtCommentText(637),
+    BrtEndComment(636),
+    //styles table
+    BrtXf(47),
+    BrtFmt(44),
+    BrtBeginFmts(615),
+    BrtEndFmts(616),
+    BrtBeginCellXFs(617),
+    BrtEndCellXFs(618),
+    BrtBeginCellStyleXFS(626),
+    BrtEndCellStyleXFS(627),
+
+    //stored strings table
+    BrtSstItem(19),   //stored strings items
+    BrtBeginSst(159), //stored strings begin sst
+    BrtEndSst(160),   //stored strings end sst
+
+    BrtBundleSh(156), //defines worksheet in wb part
+    Unimplemented(-1);
+
+
+    private final int id;
+
+    XSSFBRecordType(int id) {
+        this.id = id;
+    }
+
+    public int getId() {
+        return id;
+    }
+
+    public static XSSFBRecordType lookup(int id) {
+        for (XSSFBRecordType r : XSSFBRecordType.values()) {
+            if (r.id == id) {
+                return r;
+            }
+        }
+        return Unimplemented;
+    }
+
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRelation.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRelation.java
new file mode 100644 (file)
index 0000000..3f0b028
--- /dev/null
@@ -0,0 +1,85 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.apache.poi.POIXMLDocumentPart;
+import org.apache.poi.POIXMLRelation;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+/**
+ * Need to have this mirror class of {@link org.apache.poi.xssf.usermodel.XSSFRelation}
+ * because of conflicts with regular ooxml relations.
+ * If we failed to break this into a separate class, in the cases of SharedStrings and Styles,
+ * 2 parts would exist, and &quot;Packages shall not contain equivalent part names...&quot;
+ * <p>
+ * Also, we need to avoid the possibility of breaking the marshalling process for xml.
+ */
+@Internal
+public class XSSFBRelation extends POIXMLRelation {
+    private static final POILogger log = POILogFactory.getLogger(XSSFBRelation.class);
+
+    static final XSSFBRelation SHARED_STRINGS_BINARY = new XSSFBRelation(
+            "application/vnd.ms-excel.sharedStrings",
+            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings",
+            "/xl/sharedStrings.bin",
+            null
+    );
+
+    public static final XSSFBRelation STYLES_BINARY = new XSSFBRelation(
+            "application/vnd.ms-excel.styles",
+            PackageRelationshipTypes.STYLE_PART,
+            "/xl/styles.bin",
+            null
+    );
+
+    private XSSFBRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
+        super(type, rel, defaultName, cls);
+    }
+
+    /**
+     * Fetches the InputStream to read the contents, based
+     * of the specified core part, for which we are defined
+     * as a suitable relationship
+     */
+    public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
+        PackageRelationshipCollection prc =
+                corePart.getRelationshipsByType(getRelation());
+        Iterator<PackageRelationship> it = prc.iterator();
+        if (it.hasNext()) {
+            PackageRelationship rel = it.next();
+            PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
+            PackagePart part = corePart.getPackage().getPart(relName);
+            return part.getInputStream();
+        }
+        log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
+        return null;
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichStr.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichStr.java
new file mode 100644 (file)
index 0000000..e9ba59a
--- /dev/null
@@ -0,0 +1,47 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+class XSSFBRichStr {
+
+    public static XSSFBRichStr build(byte[] bytes, int offset) throws XSSFBParseException {
+        byte first = bytes[offset];
+        boolean dwSizeStrRunExists = (first >> 7 & 1) == 1;//first bit == 1?
+        boolean phoneticExists = (first >> 6 & 1) == 1;//second bit == 1?
+        StringBuilder sb = new StringBuilder();
+
+        int read = XSSFBUtils.readXLWideString(bytes, offset+1, sb);
+        //TODO: parse phonetic strings.
+        return new XSSFBRichStr(sb.toString(), "");
+    }
+
+    private final String string;
+    private final String phoneticString;
+
+    XSSFBRichStr(String string, String phoneticString) {
+        this.string = string;
+        this.phoneticString = phoneticString;
+    }
+
+    public String getString() {
+        return string;
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichTextString.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichTextString.java
new file mode 100644 (file)
index 0000000..1fb5b54
--- /dev/null
@@ -0,0 +1,80 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.ss.usermodel.Font;
+import org.apache.poi.util.Internal;
+import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+
+/**
+ * Wrapper class around String so that we can use it in Comment.
+ * Nothing has been implemented yet except for {@link #getString()}.
+ */
+@Internal
+class XSSFBRichTextString extends XSSFRichTextString {
+    private final String string;
+
+    XSSFBRichTextString(String string) {
+        this.string = string;
+    }
+
+    @Override
+    public void applyFont(int startIndex, int endIndex, short fontIndex) {
+
+    }
+
+    @Override
+    public void applyFont(int startIndex, int endIndex, Font font) {
+
+    }
+
+    @Override
+    public void applyFont(Font font) {
+
+    }
+
+    @Override
+    public void clearFormatting() {
+
+    }
+
+    @Override
+    public String getString() {
+        return string;
+    }
+
+    @Override
+    public int length() {
+        return string.length();
+    }
+
+    @Override
+    public int numFormattingRuns() {
+        return 0;
+    }
+
+    @Override
+    public int getIndexOfFormattingRun(int index) {
+        return 0;
+    }
+
+    @Override
+    public void applyFont(short fontIndex) {
+
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java
new file mode 100644 (file)
index 0000000..49d1a46
--- /dev/null
@@ -0,0 +1,137 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.binary;
+
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.xml.sax.SAXException;
+
+@Internal
+public class XSSFBSharedStringsTable {
+
+    /**
+     * An integer representing the total count of strings in the workbook. This count does not
+     * include any numbers, it counts only the total of text strings in the workbook.
+     */
+    private int count;
+
+    /**
+     * An integer representing the total count of unique strings in the Shared String Table.
+     * A string is unique even if it is a copy of another string, but has different formatting applied
+     * at the character level.
+     */
+    private int uniqueCount;
+
+    /**
+     * The shared strings table.
+     */
+    private List<String> strings = new ArrayList<String>();
+
+    /**
+     * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
+     * @throws IOException If reading the data from the package fails.
+     * @throws SAXException if parsing the XML data fails.
+     */
+    public XSSFBSharedStringsTable(OPCPackage pkg)
+            throws IOException, SAXException {
+        ArrayList<PackagePart> parts =
+                pkg.getPartsByContentType(XSSFBRelation.SHARED_STRINGS_BINARY.getContentType());
+
+        // Some workbooks have no shared strings table.
+        if (parts.size() > 0) {
+            PackagePart sstPart = parts.get(0);
+
+            readFrom(sstPart.getInputStream());
+        }
+    }
+
+    /**
+     * Like POIXMLDocumentPart constructor
+     *
+     * @since POI 3.14-Beta3
+     */
+    XSSFBSharedStringsTable(PackagePart part) throws IOException, SAXException {
+        readFrom(part.getInputStream());
+    }
+
+    private void readFrom(InputStream inputStream) throws IOException {
+        SSTBinaryReader reader = new SSTBinaryReader(inputStream);
+        reader.parse();
+    }
+
+    public List<String> getItems() {
+        return strings;
+    }
+
+    public String getEntryAt(int i) {
+        return strings.get(i);
+    }
+
+    /**
+     * Return an integer representing the total count of strings in the workbook. This count does not
+     * include any numbers, it counts only the total of text strings in the workbook.
+     *
+     * @return the total count of strings in the workbook
+     */
+    public int getCount() {
+        return this.count;
+    }
+
+    /**
+     * Returns an integer representing the total count of unique strings in the Shared String Table.
+     * A string is unique even if it is a copy of another string, but has different formatting applied
+     * at the character level.
+     *
+     * @return the total count of unique strings in the workbook
+     */
+    public int getUniqueCount() {
+        return this.uniqueCount;
+    }
+
+    private class SSTBinaryReader extends XSSFBParser {
+
+        SSTBinaryReader(InputStream is) {
+            super(is);
+        }
+
+        @Override
+        public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+            XSSFBRecordType type = XSSFBRecordType.lookup(recordType);
+
+            switch (type) {
+                case BrtSstItem:
+                    XSSFBRichStr rstr = XSSFBRichStr.build(data, 0);
+                    strings.add(rstr.getString());
+                    break;
+                case BrtBeginSst:
+                    count = (int) LittleEndian.getUInt(data,0);
+                    uniqueCount = (int) LittleEndian.getUInt(data, 4);
+                    break;
+            }
+
+        }
+    }
+
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java
new file mode 100644 (file)
index 0000000..ca5dab5
--- /dev/null
@@ -0,0 +1,329 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+
+import java.io.InputStream;
+import java.util.Queue;
+
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
+import org.apache.poi.xssf.usermodel.XSSFComment;
+import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+
+@Internal
+public class XSSFBSheetHandler extends XSSFBParser {
+
+    private final static int CHECK_ALL_ROWS = -1;
+
+    private final XSSFBSharedStringsTable stringsTable;
+    private final XSSFSheetXMLHandler.SheetContentsHandler handler;
+    private final XSSFBStylesTable styles;
+    private final XSSFBCommentsTable comments;
+    private final DataFormatter dataFormatter;
+    private final boolean formulasNotResults;//TODO: implement this
+
+    private int lastEndedRow = -1;
+    private int lastStartedRow = -1;
+    private int currentRow = 0;
+    private byte[] rkBuffer = new byte[8];
+    private XSSFBCellRange hyperlinkCellRange = null;
+    private StringBuilder xlWideStringBuffer = new StringBuilder();
+
+    private final XSSFBCellHeader cellBuffer = new XSSFBCellHeader();
+    public XSSFBSheetHandler(InputStream is,
+                             XSSFBStylesTable styles,
+                             XSSFBCommentsTable comments,
+                             XSSFBSharedStringsTable strings,
+                             XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
+                             DataFormatter dataFormatter,
+                             boolean formulasNotResults) {
+        super(is);
+        this.styles = styles;
+        this.comments = comments;
+        this.stringsTable = strings;
+        this.handler = sheetContentsHandler;
+        this.dataFormatter = dataFormatter;
+        this.formulasNotResults = formulasNotResults;
+    }
+
+    @Override
+    public void handleRecord(int id, byte[] data) throws XSSFBParseException {
+        XSSFBRecordType type = XSSFBRecordType.lookup(id);
+
+        switch(type) {
+            case BrtRowHdr:
+                long rw = LittleEndian.getUInt(data, 0);
+                if (rw > 0x00100000L) {//could make sure this is larger than currentRow, according to spec?
+                    throw new XSSFBParseException("Row number beyond allowable range: "+rw);
+                }
+                currentRow = (int)rw;
+                checkMissedComments(currentRow);
+                startRow(currentRow);
+                break;
+            case BrtCellIsst:
+                handleBrtCellIsst(data);
+                break;
+            case BrtCellSt: //TODO: needs test
+                handleCellSt(data);
+                break;
+            case BrtCellRk:
+                handleCellRk(data);
+                break;
+            case BrtCellReal:
+                handleCellReal(data);
+                break;
+            case BrtCellBool:
+                handleBoolean(data);
+                break;
+            case BrtCellError:
+                handleCellError(data);
+                break;
+            case BrtCellBlank:
+                beforeCellValue(data);//read cell info and check for missing comments
+                break;
+            case BrtFmlaString:
+                handleFmlaString(data);
+                break;
+            case BrtFmlaNum:
+                handleFmlaNum(data);
+                break;
+            case BrtFmlaError:
+                handleFmlaError(data);
+                break;
+                //TODO: All the PCDI and PCDIA
+            case BrtEndSheetData:
+                checkMissedComments(CHECK_ALL_ROWS);
+                endRow(lastStartedRow);
+                break;
+            case BrtBeginHeaderFooter:
+                handleHeaderFooter(data);
+                break;
+        }
+    }
+
+
+    private void beforeCellValue(byte[] data) {
+        XSSFBCellHeader.parse(data, 0, currentRow, cellBuffer);
+        checkMissedComments(currentRow, cellBuffer.getColNum());
+    }
+
+    private void handleCellValue(String formattedValue) {
+        CellAddress cellAddress = new CellAddress(currentRow, cellBuffer.getColNum());
+        XSSFBComment comment = null;
+        if (comments != null) {
+            comment = comments.get(cellAddress);
+        }
+        handler.cell(cellAddress.formatAsString(), formattedValue, comment);
+    }
+
+    private void handleFmlaNum(byte[] data) {
+        beforeCellValue(data);
+        //xNum
+        double val = LittleEndian.getDouble(data, XSSFBCellHeader.length);
+        String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
+        String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
+        handleCellValue(formattedVal);
+    }
+
+    private void handleCellSt(byte[] data) {
+        beforeCellValue(data);
+        xlWideStringBuffer.setLength(0);
+        XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer);
+        handleCellValue(xlWideStringBuffer.toString());
+    }
+
+    private void handleFmlaString(byte[] data) {
+        beforeCellValue(data);
+        xlWideStringBuffer.setLength(0);
+        XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer);
+        handleCellValue(xlWideStringBuffer.toString());
+    }
+
+    private void handleCellError(byte[] data) {
+        beforeCellValue(data);
+        //TODO, read byte to figure out the type of error
+        handleCellValue("ERROR");
+    }
+
+    private void handleFmlaError(byte[] data) {
+        beforeCellValue(data);
+        //TODO, read byte to figure out the type of error
+        handleCellValue("ERROR");
+    }
+
+    private void handleBoolean(byte[] data) {
+        beforeCellValue(data);
+        String formattedVal = (data[XSSFBCellHeader.length] == 1) ? "TRUE" : "FALSE";
+        handleCellValue(formattedVal);
+    }
+
+    private void handleCellReal(byte[] data) {
+        beforeCellValue(data);
+        //xNum
+        double val = LittleEndian.getDouble(data, XSSFBCellHeader.length);
+        String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
+        String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
+        handleCellValue(formattedVal);
+    }
+
+    private void handleCellRk(byte[] data) {
+        beforeCellValue(data);
+        double val = rkNumber(data, XSSFBCellHeader.length);
+        String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
+        String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
+        handleCellValue(formattedVal);
+    }
+
+    private void handleBrtCellIsst(byte[] data) {
+        beforeCellValue(data);
+        long idx = LittleEndian.getUInt(data, XSSFBCellHeader.length);
+        //check for out of range, buffer overflow
+
+        XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt((int)idx));
+        handleCellValue(rtss.getString());
+    }
+
+
+    private void handleHeaderFooter(byte[] data) {
+        XSSFBHeaderFooters headerFooter = XSSFBHeaderFooters.parse(data);
+        outputHeaderFooter(headerFooter.getHeader());
+        outputHeaderFooter(headerFooter.getFooter());
+        outputHeaderFooter(headerFooter.getHeaderEven());
+        outputHeaderFooter(headerFooter.getFooterEven());
+        outputHeaderFooter(headerFooter.getHeaderFirst());
+        outputHeaderFooter(headerFooter.getFooterFirst());
+    }
+
+    private void outputHeaderFooter(XSSFBHeaderFooter headerFooter) {
+        String text = headerFooter.getString();
+        if (text != null && text.trim().length() > 0) {
+            handler.headerFooter(text, headerFooter.isHeader(), headerFooter.getHeaderFooterTypeLabel());
+        }
+    }
+
+
+    //at start of next cell or end of row, return the cellAddress if it equals currentRow and col
+    private void checkMissedComments(int currentRow, int colNum) {
+        if (comments == null) {
+            return;
+        }
+        Queue<CellAddress> queue = comments.getAddresses();
+        while (queue.size() > 0) {
+            CellAddress cellAddress = queue.peek();
+            if (cellAddress.getRow() == currentRow && cellAddress.getColumn() < colNum) {
+                cellAddress = queue.remove();
+                dumpEmptyCellComment(cellAddress, comments.get(cellAddress));
+            } else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() == colNum) {
+                queue.remove();
+                return;
+            } else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() > colNum) {
+                return;
+            } else if (cellAddress.getRow() > currentRow) {
+                return;
+            }
+        }
+    }
+
+    //check for anything from rows before
+    private void checkMissedComments(int currentRow) {
+        if (comments == null) {
+            return;
+        }
+        Queue<CellAddress> queue = comments.getAddresses();
+        int lastInterpolatedRow = -1;
+        while (queue.size() > 0) {
+            CellAddress cellAddress = queue.peek();
+            if (currentRow == CHECK_ALL_ROWS || cellAddress.getRow() < currentRow) {
+                cellAddress = queue.remove();
+                if (cellAddress.getRow() != lastInterpolatedRow) {
+                    startRow(cellAddress.getRow());
+                }
+                dumpEmptyCellComment(cellAddress, comments.get(cellAddress));
+                lastInterpolatedRow = cellAddress.getRow();
+            } else {
+                break;
+            }
+        }
+
+    }
+
+    private void startRow(int row) {
+        if (row == lastStartedRow) {
+            return;
+        }
+
+        if (lastStartedRow != lastEndedRow) {
+            endRow(lastStartedRow);
+        }
+        handler.startRow(row);
+        lastStartedRow = row;
+    }
+
+    private void endRow(int row) {
+        if (lastEndedRow == row) {
+            return;
+        }
+        handler.endRow(row);
+        lastEndedRow = row;
+    }
+
+    private void dumpEmptyCellComment(CellAddress cellAddress, XSSFBComment comment) {
+        handler.cell(cellAddress.formatAsString(), null, comment);
+    }
+
+    private double rkNumber(byte[] data, int offset) {
+        //see 2.5.122 for this abomination
+        byte b0 = data[offset];
+        String s = Integer.toString(b0, 2);
+        boolean numDivBy100 = ((b0 & 1) == 1); // else as is
+        boolean floatingPoint = ((b0 >> 1 & 1) == 0); // else signed integer
+
+        //unset highest 2 bits
+        b0 &= ~1;
+        b0 &= ~(1<<1);
+
+        rkBuffer[4] = b0;
+        for (int i = 1; i < 4; i++) {
+            rkBuffer[i+4] = data[offset+i];
+        }
+        double d = 0.0;
+        if (floatingPoint) {
+            d = LittleEndian.getDouble(rkBuffer);
+        } else {
+            d = LittleEndian.getInt(rkBuffer);
+        }
+        d = (numDivBy100) ? d/100 : d;
+        return d;
+    }
+
+    /**
+     * You need to implement this to handle the results
+     *  of the sheet parsing.
+     */
+    public interface SheetContentsHandler extends XSSFSheetXMLHandler.SheetContentsHandler {
+        /**
+         * A cell, with the given formatted value (may be null),
+         * a url (may be null), a toolTip (may be null)
+         *  and possibly a comment (may be null), was encountered */
+        void hyperlinkCell(String cellReference, String formattedValue, String url, String toolTip, XSSFComment comment);
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBStylesTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBStylesTable.java
new file mode 100644 (file)
index 0000000..8584e95
--- /dev/null
@@ -0,0 +1,101 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.poi.POIXMLException;
+import org.apache.poi.ss.usermodel.BuiltinFormats;
+import org.apache.poi.util.Internal;
+
+@Internal
+public class XSSFBStylesTable extends XSSFBParser {
+
+    private final SortedMap<Short, String> numberFormats = new TreeMap<Short,String>();
+    private final List<Short> styleIds = new ArrayList<Short>();
+
+    private boolean inCellXFS = false;
+    private boolean inFmts = false;
+    public XSSFBStylesTable(InputStream is) throws IOException {
+        super(is);
+        parse();
+    }
+
+    String getNumberFormatString(int idx) {
+        if (numberFormats.containsKey(styleIds.get((short)idx))) {
+            return numberFormats.get(styleIds.get((short)idx));
+        }
+
+        return BuiltinFormats.getBuiltinFormat(styleIds.get((short)idx));
+    }
+
+    @Override
+    public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+        XSSFBRecordType type = XSSFBRecordType.lookup(recordType);
+        switch (type) {
+            case BrtBeginCellXFs:
+                inCellXFS = true;
+                break;
+            case BrtEndCellXFs:
+                inCellXFS = false;
+                break;
+            case BrtXf:
+                if (inCellXFS) {
+                    handleBrtXFInCellXF(data);
+                }
+                break;
+            case BrtBeginFmts:
+                inFmts = true;
+                break;
+            case BrtEndFmts:
+                inFmts = false;
+                break;
+            case BrtFmt:
+                if (inFmts) {
+                    handleFormat(data);
+                }
+                break;
+
+        }
+    }
+
+    private void handleFormat(byte[] data) {
+        int ifmt = data[0] & 0xFF;
+        if (ifmt > Short.MAX_VALUE) {
+            throw new POIXMLException("Format id must be a short");
+        }
+        StringBuilder sb = new StringBuilder();
+        XSSFBUtils.readXLWideString(data, 2, sb);
+        String fmt = sb.toString();
+        numberFormats.put((short)ifmt, fmt);
+    }
+
+    private void handleBrtXFInCellXF(byte[] data) {
+        int ifmtOffset = 2;
+        //int ifmtLength = 2;
+
+        //numFmtId in xml terms
+        int ifmt = data[ifmtOffset] & 0xFF;//the second byte is ignored
+        styleIds.add((short)ifmt);
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBUtils.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBUtils.java
new file mode 100644 (file)
index 0000000..e3a46b0
--- /dev/null
@@ -0,0 +1,108 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+
+import java.nio.charset.Charset;
+
+import org.apache.poi.POIXMLException;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+
+@Internal
+public class XSSFBUtils {
+
+    /**
+     * Reads an XLNullableWideString.
+     * @param data data from which to read
+     * @param offset in data from which to start
+     * @param sb buffer to which to write.  You must setLength(0) before calling!
+     * @return number of bytes read
+     * @throws XSSFBParseException if there was an exception during reading
+     */
+    static int readXLNullableWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException {
+        long numChars = LittleEndian.getUInt(data, offset);
+        if (numChars < 0) {
+            throw new XSSFBParseException("too few chars to read");
+        } else if (numChars == 0xFFFFFFFFL) { //this means null value (2.5.166), do not read any bytes!!!
+            return 0;
+        } else if (numChars > 0xFFFFFFFFL) {
+            throw new XSSFBParseException("too many chars to read");
+        }
+
+        int numBytes = 2*(int)numChars;
+        offset += 4;
+        if (offset+numBytes > data.length) {
+            throw new XSSFBParseException("trying to read beyond data length:" +
+             "offset="+offset+", numBytes="+numBytes+", data.length="+data.length);
+        }
+        sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE")));
+        numBytes+=4;
+        return numBytes;
+    }
+
+
+    /**
+     * Reads an XLNullableWideString.
+     * @param data data from which to read
+     * @param offset in data from which to start
+     * @param sb buffer to which to write.  You must setLength(0) before calling!
+     * @return number of bytes read
+     * @throws XSSFBParseException if there was an exception while trying to read the string
+     */
+    public static int readXLWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException {
+        long numChars = LittleEndian.getUInt(data, offset);
+        if (numChars < 0) {
+            throw new XSSFBParseException("too few chars to read");
+        } else if (numChars > 0xFFFFFFFFL) {
+            throw new XSSFBParseException("too many chars to read");
+        }
+        int numBytes = 2*(int)numChars;
+        offset += 4;
+        if (offset+numBytes > data.length) {
+            throw new XSSFBParseException("trying to read beyond data length");
+        }
+        sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE")));
+        numBytes+=4;
+        return numBytes;
+    }
+
+    static int castToInt(long val) {
+        if (val < Integer.MAX_VALUE && val > Integer.MIN_VALUE) {
+            return (int)val;
+        }
+        throw new POIXMLException("val ("+val+") can't be cast to int");
+    }
+
+    static short castToShort(int val) {
+        if (val < Short.MAX_VALUE && val > Short.MIN_VALUE) {
+            return (short)val;
+        }
+        throw new POIXMLException("val ("+val+") can't be cast to short");
+
+    }
+
+    //TODO: move to LittleEndian?
+    static int get24BitInt( byte[] data, int offset) {
+        int i = offset;
+        int b0 = data[i++] & 0xFF;
+        int b1 = data[i++] & 0xFF;
+        int b2 = data[i] & 0xFF;
+        return ( b2 << 16 ) + ( b1 << 8 ) + b0;
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java
new file mode 100644 (file)
index 0000000..a02e8ce
--- /dev/null
@@ -0,0 +1,117 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.util.Internal;
+
+/**
+ * This is a read only record that maintains information about
+ * a hyperlink.  In OOXML land, this information has to be merged
+ * from 1) the sheet's .rels to get the url and 2) from after the
+ * sheet data in they hyperlink section.
+ *
+ * The {@link #display} is often empty and should be filled from
+ * the contents of the anchor cell.
+ *
+ */
+@Internal
+public class XSSFHyperlinkRecord {
+
+    private final CellRangeAddress cellRangeAddress;
+    private final String relId;
+    private String location;
+    private String toolTip;
+    private String display;
+
+    XSSFHyperlinkRecord(CellRangeAddress cellRangeAddress, String relId, String location, String toolTip, String display) {
+        this.cellRangeAddress = cellRangeAddress;
+        this.relId = relId;
+        this.location = location;
+        this.toolTip = toolTip;
+        this.display = display;
+    }
+
+    void setLocation(String location) {
+        this.location = location;
+    }
+
+    void setToolTip(String toolTip) {
+        this.toolTip = toolTip;
+    }
+
+    void setDisplay(String display) {
+        this.display = display;
+    }
+
+    CellRangeAddress getCellRangeAddress() {
+        return cellRangeAddress;
+    }
+
+    public String getRelId() {
+        return relId;
+    }
+
+    public String getLocation() {
+        return location;
+    }
+
+    public String getToolTip() {
+        return toolTip;
+    }
+
+    public String getDisplay() {
+        return display;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        XSSFHyperlinkRecord that = (XSSFHyperlinkRecord) o;
+
+        if (cellRangeAddress != null ? !cellRangeAddress.equals(that.cellRangeAddress) : that.cellRangeAddress != null)
+            return false;
+        if (relId != null ? !relId.equals(that.relId) : that.relId != null) return false;
+        if (location != null ? !location.equals(that.location) : that.location != null) return false;
+        if (toolTip != null ? !toolTip.equals(that.toolTip) : that.toolTip != null) return false;
+        return display != null ? display.equals(that.display) : that.display == null;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = cellRangeAddress != null ? cellRangeAddress.hashCode() : 0;
+        result = 31 * result + (relId != null ? relId.hashCode() : 0);
+        result = 31 * result + (location != null ? location.hashCode() : 0);
+        result = 31 * result + (toolTip != null ? toolTip.hashCode() : 0);
+        result = 31 * result + (display != null ? display.hashCode() : 0);
+        return result;
+    }
+
+    @Override
+    public String toString() {
+        return "XSSFHyperlinkRecord{" +
+                "cellRangeAddress=" + cellRangeAddress +
+                ", relId='" + relId + '\'' +
+                ", location='" + location + '\'' +
+                ", toolTip='" + toolTip + '\'' +
+                ", display='" + display + '\'' +
+                '}';
+    }
+}
diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/package.html b/src/ooxml/java/org/apache/poi/xssf/binary/package.html
new file mode 100644 (file)
index 0000000..c7e4a01
--- /dev/null
@@ -0,0 +1,44 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!--
+   ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+   ====================================================================
+-->
+<html>
+<head>
+</head>
+<body bgcolor="white">
+
+<p>The org.apache.poi.xssf.binary package includes necessary underlying components
+for streaming/read-only processing of xlsb files.
+</p>
+<p>
+    POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader
+    in o.a.p.xssf.eventusermodel.
+</p>
+<p>
+    This feature was added in poi-3.15-beta3 and should be considered experimental.  Most classes
+    have been marked @Internal and the API is subject to change.
+</p>
+<h2>Related Documentation</h2>
+
+For overviews, tutorials, examples, guides, and tool documentation, please see:
+<ul>
+<li><a href="http://poi.apache.org">Apache POI Project</a>
+</ul>
+
+</body>
+</html>
diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java
new file mode 100644 (file)
index 0000000..b8f54cd
--- /dev/null
@@ -0,0 +1,172 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.eventusermodel;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.xssf.binary.XSSFBCommentsTable;
+import org.apache.poi.xssf.binary.XSSFBParseException;
+import org.apache.poi.xssf.binary.XSSFBParser;
+import org.apache.poi.xssf.binary.XSSFBRecordType;
+import org.apache.poi.xssf.binary.XSSFBRelation;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.binary.XSSFBUtils;
+import org.apache.poi.xssf.model.CommentsTable;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+
+/**
+ * Reader for xlsb files.
+ */
+public class XSSFBReader extends XSSFReader {
+    /**
+     * Creates a new XSSFReader, for the given package
+     *
+     * @param pkg opc package
+     */
+    public XSSFBReader(OPCPackage pkg) throws IOException, OpenXML4JException {
+        super(pkg);
+    }
+
+    /**
+     * Returns an Iterator which will let you get at all the
+     *  different Sheets in turn.
+     * Each sheet's InputStream is only opened when fetched
+     *  from the Iterator. It's up to you to close the
+     *  InputStreams when done with each one.
+     */
+    @Override
+    public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
+        return new SheetIterator(workbookPart);
+    }
+
+    public XSSFBStylesTable getXSSFBStylesTable() throws IOException {
+        ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFBRelation.STYLES_BINARY.getContentType());
+        if(parts.size() == 0) return null;
+
+        // Create the Styles Table, and associate the Themes if present
+        return new XSSFBStylesTable(parts.get(0).getInputStream());
+
+    }
+
+
+    public static class SheetIterator extends XSSFReader.SheetIterator {
+
+        /**
+         * Construct a new SheetIterator
+         *
+         * @param wb package part holding workbook.xml
+         */
+        private SheetIterator(PackagePart wb) throws IOException {
+            super(wb);
+        }
+
+        Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
+            SheetRefLoader sheetRefLoader = new SheetRefLoader(wb.getInputStream());
+            sheetRefLoader.parse();
+            return sheetRefLoader.getSheets().iterator();
+        }
+
+        /**
+         * Not supported by XSSFBReader's SheetIterator.
+         * Please use {@link #getXSSFBSheetComments()} instead.
+         * @return nothing, always throws IllegalArgumentException!
+         */
+        @Override
+        public CommentsTable getSheetComments() {
+            throw new IllegalArgumentException("Please use getXSSFBSheetComments");
+        }
+
+        public XSSFBCommentsTable getXSSFBSheetComments() {
+            PackagePart sheetPkg = getSheetPart();
+
+            // Do we have a comments relationship? (Only ever one if so)
+            try {
+                PackageRelationshipCollection commentsList =
+                        sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
+                if (commentsList.size() > 0) {
+                    PackageRelationship comments = commentsList.getRelationship(0);
+                    if (comments == null || comments.getTargetURI() == null) {
+                        return null;
+                    }
+                    PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
+                    PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
+                    return new XSSFBCommentsTable(commentsPart.getInputStream());
+                }
+            } catch (InvalidFormatException e) {
+                return null;
+            } catch (IOException e) {
+                return null;
+            }
+            return null;
+        }
+
+    }
+
+    private static class SheetRefLoader extends XSSFBParser {
+        List<XSSFSheetRef> sheets = new LinkedList<XSSFSheetRef>();
+
+        private SheetRefLoader(InputStream is) {
+            super(is);
+        }
+
+        @Override
+        public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+            if (recordType == XSSFBRecordType.BrtBundleSh.getId()) {
+                addWorksheet(data);
+            }
+        }
+
+        private void addWorksheet(byte[] data) {
+            int offset = 0;
+            //this is the sheet state #2.5.142
+            long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+
+            long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+            //according to #2.4.304
+            if (iTabID < 1 || iTabID > 0x0000FFFFL) {
+                throw new XSSFBParseException("table id out of range: "+iTabID);
+            }
+            StringBuilder sb = new StringBuilder();
+            offset += XSSFBUtils.readXLWideString(data, offset, sb);
+            String relId = sb.toString();
+            sb.setLength(0);
+            XSSFBUtils.readXLWideString(data, offset, sb);
+            String name = sb.toString();
+            if (relId != null && relId.trim().length() > 0) {
+                sheets.add(new XSSFSheetRef(relId, name));
+            }
+        }
+
+        List<XSSFSheetRef> getSheets() {
+            return sheets;
+        }
+    }
+}
\ No newline at end of file
index e5c9cb25b10333e922d7fa51b3c2c7af0e7be1e4..5b43c2010101eed7b79b53729f6dea1326cb4352 100644 (file)
 ==================================================================== */
 package org.apache.poi.xssf.eventusermodel;
 
-import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
-
+import javax.xml.parsers.ParserConfigurationException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 
 import org.apache.poi.POIXMLException;
@@ -39,6 +40,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
+import org.apache.poi.util.SAXHelper;
 import org.apache.poi.xssf.model.CommentsTable;
 import org.apache.poi.xssf.model.SharedStringsTable;
 import org.apache.poi.xssf.model.StylesTable;
@@ -47,9 +49,11 @@ import org.apache.poi.xssf.usermodel.XSSFDrawing;
 import org.apache.poi.xssf.usermodel.XSSFRelation;
 import org.apache.poi.xssf.usermodel.XSSFShape;
 import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
 
 /**
  * This class makes it easy to get at individual parts
@@ -62,8 +66,8 @@ public class XSSFReader {
 
     private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class);
 
-    private OPCPackage pkg;
-    private PackagePart workbookPart;
+    protected OPCPackage pkg;
+    protected PackagePart workbookPart;
 
     /**
      * Creates a new XSSFReader, for the given package
@@ -194,23 +198,23 @@ public class XSSFReader {
         private final Map<String, PackagePart> sheetMap;
 
         /**
-         * Current CTSheet bean
+         * Current sheet reference
          */
-        private CTSheet ctSheet;
-        
+        XSSFSheetRef xssfSheetRef;
+
         /**
          * Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
          * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
          * i.e. as they are stored in the underlying package
          */
-        private final Iterator<CTSheet> sheetIterator;
+        final Iterator<XSSFSheetRef> sheetIterator;
 
         /**
          * Construct a new SheetIterator
          *
          * @param wb package part holding workbook.xml
          */
-        private SheetIterator(PackagePart wb) throws IOException {
+        SheetIterator(PackagePart wb) throws IOException {
 
             /**
              * The order of sheets is defined by the order of CTSheet elements in workbook.xml
@@ -228,25 +232,44 @@ public class XSSFReader {
                         sheetMap.put(rel.getId(), pkg.getPart(relName));
                     }
                 }
-                //step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
-                //Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
-                CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook();
-                List<CTSheet> validSheets = new ArrayList<CTSheet>();
-                for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) {
-                    //if there's no relationship id, silently skip the sheet
-                    String sheetId = ctSheet.getId();
-                    if (sheetId != null && sheetId.length() > 0) {
-                        validSheets.add(ctSheet);
-                    }
-                }
-                sheetIterator = validSheets.iterator();
+                //step 2. Read array of CTSheet elements, wrap it in a LinkedList
+                //and construct an iterator
+                sheetIterator = createSheetIteratorFromWB(wb);
             } catch (InvalidFormatException e){
                 throw new POIXMLException(e);
-            } catch (XmlException e){
+            }
+        }
+
+        Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
+
+            XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader();
+            XMLReader xmlReader = null;
+            try {
+                xmlReader = SAXHelper.newXMLReader();
+            } catch (ParserConfigurationException e) {
+                throw new POIXMLException(e);
+            } catch (SAXException e) {
                 throw new POIXMLException(e);
             }
+            xmlReader.setContentHandler(xmlSheetRefReader);
+            try {
+                xmlReader.parse(new InputSource(wb.getInputStream()));
+            } catch (SAXException e) {
+                throw new POIXMLException(e);
+            }
+
+            List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>();
+            for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) {
+                //if there's no relationship id, silently skip the sheet
+                String sheetId = xssfSheetRef.getId();
+                if (sheetId != null && sheetId.length() > 0) {
+                    validSheets.add(xssfSheetRef);
+                }
+            }
+            return validSheets.iterator();
         }
 
+
         /**
          * Returns <tt>true</tt> if the iteration has more elements.
          *
@@ -264,9 +287,9 @@ public class XSSFReader {
          */
         @Override
         public InputStream next() {
-            ctSheet = sheetIterator.next();
+            xssfSheetRef = sheetIterator.next();
 
-            String sheetId = ctSheet.getId();
+            String sheetId = xssfSheetRef.getId();
             try {
                 PackagePart sheetPkg = sheetMap.get(sheetId);
                 return sheetPkg.getInputStream();
@@ -281,7 +304,7 @@ public class XSSFReader {
          * @return name of the current sheet
          */
         public String getSheetName() {
-            return ctSheet.getName();
+            return xssfSheetRef.getName();
         }
         
         /**
@@ -344,7 +367,7 @@ public class XSSFReader {
         }
         
         public PackagePart getSheetPart() {
-           String sheetId = ctSheet.getId();
+           String sheetId = xssfSheetRef.getId();
            return sheetMap.get(sheetId);
         }
 
@@ -356,4 +379,52 @@ public class XSSFReader {
             throw new IllegalStateException("Not supported");
         }
     }
+
+    protected final static class XSSFSheetRef {
+        //do we need to store sheetId, too?
+        private final String id;
+        private final String name;
+
+        public XSSFSheetRef(String id, String name) {
+            this.id = id;
+            this.name = name;
+        }
+
+        public String getId() {
+            return id;
+        }
+
+        public String getName() {
+            return name;
+        }
+    }
+
+    //scrapes sheet reference info and order from workbook.xml
+    private static class XMLSheetRefReader extends DefaultHandler {
+        private final static String SHEET = "sheet";
+        private final static String ID = "id";
+        private final static String NAME = "name";
+
+        private final List<XSSFSheetRef> sheetRefs = new LinkedList();
+
+        @Override
+        public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
+            if (localName.toLowerCase(Locale.US).equals(SHEET)) {
+                String name = null;
+                String id = null;
+                for (int i = 0; i < attrs.getLength(); i++) {
+                    if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) {
+                        name = attrs.getValue(i);
+                    } else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) {
+                        id = attrs.getValue(i);
+                    }
+                    sheetRefs.add(new XSSFSheetRef(id, name));
+                }
+            }
+        }
+
+        List<XSSFSheetRef> getSheetRefs() {
+            return Collections.unmodifiableList(sheetRefs);
+        }
+    }
 }
diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java
new file mode 100644 (file)
index 0000000..b3e667e
--- /dev/null
@@ -0,0 +1,160 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.extractor;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.xssf.binary.XSSFBCommentsTable;
+import org.apache.poi.xssf.binary.XSSFBHyperlinksTable;
+import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
+import org.apache.poi.xssf.binary.XSSFBSheetHandler;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.eventusermodel.XSSFBReader;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.xmlbeans.XmlException;
+import org.xml.sax.SAXException;
+
+/**
+ * Implementation of a text extractor or xlsb Excel
+ * files that uses SAX-like binary parsing.
+ */
+public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
+        implements org.apache.poi.ss.extractor.ExcelExtractor {
+
+    public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
+            XSSFRelation.XLSB_BINARY_WORKBOOK
+    };
+
+    private boolean handleHyperlinksInCells = false;
+
+    public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
+        super(path);
+    }
+
+    public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
+        super(container);
+    }
+
+    public static void main(String[] args) throws Exception {
+        if (args.length < 1) {
+            System.err.println("Use:");
+            System.err.println("  XSSFBEventBasedExcelExtractor <filename.xlsb>");
+            System.exit(1);
+        }
+        POIXMLTextExtractor extractor =
+                new XSSFBEventBasedExcelExtractor(args[0]);
+        System.out.println(extractor.getText());
+        extractor.close();
+    }
+
+    public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
+        this.handleHyperlinksInCells = handleHyperlinksInCells;
+    }
+
+    /**
+     * Should we return the formula itself, and not
+     * the result it produces? Default is false
+     * This is currently unsupported for xssfb
+     */
+    @Override
+    public void setFormulasNotResults(boolean formulasNotResults) {
+        throw new IllegalArgumentException("Not currently supported");
+    }
+
+    /**
+     * Processes the given sheet
+     */
+    public void processSheet(
+            SheetContentsHandler sheetContentsExtractor,
+            XSSFBStylesTable styles,
+            XSSFBCommentsTable comments,
+            XSSFBSharedStringsTable strings,
+            InputStream sheetInputStream)
+            throws IOException, SAXException {
+
+        DataFormatter formatter;
+        if (locale == null) {
+            formatter = new DataFormatter();
+        } else {
+            formatter = new DataFormatter(locale);
+        }
+
+        XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler(
+                sheetInputStream,
+                styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults
+        );
+        xssfbSheetHandler.parse();
+    }
+
+    /**
+     * Processes the file and returns the text
+     */
+    public String getText() {
+        try {
+            XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container);
+            XSSFBReader xssfbReader = new XSSFBReader(container);
+            XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
+            XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
+
+            StringBuffer text = new StringBuffer();
+            SheetTextExtractor sheetExtractor = new SheetTextExtractor();
+            XSSFBHyperlinksTable hyperlinksTable = null;
+            while (iter.hasNext()) {
+                InputStream stream = iter.next();
+                if (includeSheetNames) {
+                    text.append(iter.getSheetName());
+                    text.append('\n');
+                }
+                if (handleHyperlinksInCells) {
+                    hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
+                }
+                XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null;
+                processSheet(sheetExtractor, styles, comments, strings, stream);
+                if (includeHeadersFooters) {
+                    sheetExtractor.appendHeaderText(text);
+                }
+                sheetExtractor.appendCellText(text);
+                if (includeTextBoxes) {
+                    processShapes(iter.getShapes(), text);
+                }
+                if (includeHeadersFooters) {
+                    sheetExtractor.appendFooterText(text);
+                }
+                sheetExtractor.reset();
+                stream.close();
+            }
+
+            return text.toString();
+        } catch (IOException e) {
+            System.err.println(e);
+            return null;
+        } catch (SAXException se) {
+            System.err.println(se);
+            return null;
+        } catch (OpenXML4JException o4je) {
+            System.err.println(o4je);
+            return null;
+        }
+    }
+
+}
index e49c11c2ead7dc166c779455d40d5be56a18b36f..2cfa099d9d64e022bcd114d1005658f1956b68a7 100644 (file)
@@ -54,15 +54,15 @@ import org.xml.sax.XMLReader;
  */
 public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor 
        implements org.apache.poi.ss.extractor.ExcelExtractor {
-    private OPCPackage container;
+    OPCPackage container;
     private POIXMLProperties properties;
 
-    private Locale locale;
-    private boolean includeTextBoxes = true;
-    private boolean includeSheetNames = true;
-    private boolean includeCellComments = false;
-    private boolean includeHeadersFooters = true;
-    private boolean formulasNotResults = false;
+    Locale locale;
+    boolean includeTextBoxes = true;
+    boolean includeSheetNames = true;
+    boolean includeCellComments = false;
+    boolean includeHeadersFooters = true;
+    boolean formulasNotResults = false;
     private boolean concatenatePhoneticRuns = true;
 
     public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
@@ -240,7 +240,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
        }
    }
    
-    private void processShapes(List<XSSFShape> shapes, StringBuffer text) {
+    void processShapes(List<XSSFShape> shapes, StringBuffer text) {
         if (shapes == null){
             return;
         }
@@ -349,7 +349,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
          * @see XSSFExcelExtractor#getText()
          * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
          */
-        private void appendHeaderText(StringBuffer buffer) {
+        void appendHeaderText(StringBuffer buffer) {
             appendHeaderFooterText(buffer, "firstHeader");
             appendHeaderFooterText(buffer, "oddHeader");
             appendHeaderFooterText(buffer, "evenHeader");
@@ -361,7 +361,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
          * @see XSSFExcelExtractor#getText()
          * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
          */
-        private void appendFooterText(StringBuffer buffer) {
+        void appendFooterText(StringBuffer buffer) {
             // append the text for each footer type in the same order
             // they are appended in XSSFExcelExtractor
             appendHeaderFooterText(buffer, "firstFooter");
@@ -372,7 +372,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
         /**
          * Append the cell contents we have collected.
          */
-        private void appendCellText(StringBuffer buffer) {
+        void appendCellText(StringBuffer buffer) {
             checkMaxTextSize(buffer, output.toString());
             buffer.append(output);
         }
@@ -380,7 +380,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
         /**
          * Reset this <code>SheetTextExtractor</code> for the next sheet.
          */
-        private void reset() {
+        void reset() {
             output.setLength(0);
             firstCellOfRow = true;
             if (headerFooterMap != null) {
index 0d7bc5a8a34cf52153e0d2df00d0316d590cb366..8405447c00af00c86c39032619bf0e325cf64440 100644 (file)
@@ -68,6 +68,7 @@ public class TestExtractorFactory {
     private static File xlsxStrict;
     private static File xltx;
     private static File xlsEmb;
+    private static File xlsb;
 
     private static File doc;
     private static File doc6;
@@ -108,6 +109,7 @@ public class TestExtractorFactory {
         xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
         xltx = getFileAndCheck(ssTests, "test.xltx");
         xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
+        xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
 
         POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
         doc = getFileAndCheck(wpTests, "SampleDoc.doc");
@@ -172,6 +174,13 @@ public class TestExtractorFactory {
         );
         extractor.close();
 
+        extractor = ExtractorFactory.createExtractor(xlsb);
+        assertTrue(
+                extractor.getText().contains("test")
+        );
+        extractor.close();
+
+
         extractor = ExtractorFactory.createExtractor(xltx);
         assertTrue(
                 extractor.getText().contains("test")
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java b/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java
new file mode 100644 (file)
index 0000000..7bf1cf3
--- /dev/null
@@ -0,0 +1,56 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.junit.Test;
+
+public class TestXSSFBSharedStringsTable {
+
+
+    private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+    @Test
+    public void testBasic() throws Exception {
+
+        OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsb"));
+        List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.bin"));
+        assertEquals(1, parts.size());
+
+        XSSFBSharedStringsTable rtbl = new XSSFBSharedStringsTable(parts.get(0));
+        List<String> strings = rtbl.getItems();
+        assertEquals(49, strings.size());
+
+        assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
+        assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
+        assertEquals(55, rtbl.getCount());
+        assertEquals(49, rtbl.getUniqueCount());
+
+        //TODO: add in tests for phonetic runs
+
+    }
+
+
+}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java b/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java
new file mode 100644 (file)
index 0000000..992517d
--- /dev/null
@@ -0,0 +1,54 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.xssf.eventusermodel.XSSFBReader;
+import org.apache.poi.xssf.eventusermodel.XSSFReader;
+import org.junit.Test;
+
+public class TestXSSFBSheetHyperlinkManager {
+
+    private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+    @Test
+    public void testBasic() throws Exception {
+
+        OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb"));
+        XSSFBReader reader = new XSSFBReader(pkg);
+        XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData();
+        it.next();
+        XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart());
+        List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0));
+        assertNotNull(records);
+        assertEquals(1, records.size());
+        XSSFHyperlinkRecord record = records.get(0);
+        assertEquals("http://tika.apache.org/", record.getLocation());
+        assertEquals("rId2", record.getRelId());
+
+    }
+
+
+}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java
new file mode 100644 (file)
index 0000000..57e1e83
--- /dev/null
@@ -0,0 +1,224 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.eventusermodel;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
+import org.apache.poi.xssf.binary.XSSFBSheetHandler;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.usermodel.XSSFComment;
+import org.junit.Test;
+
+public class TestXSSFBReader {
+
+    private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+    @Test
+    public void testBasic() throws Exception {
+        List<String> sheetTexts = getSheets("testVarious.xlsb");
+
+        assertEquals(1, sheetTexts.size());
+        String xsxml = sheetTexts.get(0);
+        assertContains("This is a string", xsxml);
+        assertContains("<td ref=\"B2\">13</td>", xsxml);
+        assertContains("<td ref=\"B3\">13.12112313</td>", xsxml);
+        assertContains("<td ref=\"B4\">$   3.03</td>", xsxml);
+        assertContains("<td ref=\"B5\">20%</td>", xsxml);
+        assertContains("<td ref=\"B6\">13.12</td>", xsxml);
+        assertContains("<td ref=\"B7\">1.23457E+14</td>", xsxml);
+        assertContains("<td ref=\"B8\">1.23457E+15</td>", xsxml);
+
+        assertContains("46/1963", xsxml);//custom format 1
+        assertContains("3/128", xsxml);//custom format 2
+
+        assertContains("<tr num=\"7>\n" +
+                "\t<td ref=\"A8\">longer int</td>\n" +
+                "\t<td ref=\"B8\">1.23457E+15</td>\n" +
+                "\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                "test comment2</span></td>\n" +
+                "</tr num=\"7>", xsxml);
+
+        assertContains("<tr num=\"34>\n" +
+                "\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                "comment6 actually in cell</span></td>\n" +
+                "</tr num=\"34>", xsxml);
+
+        assertContains("<tr num=\"64>\n" +
+                "\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                "comment7 end of file</span></td>\n" +
+                "</tr num=\"64>", xsxml);
+
+        assertContains("<tr num=\"65>\n" +
+                "\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                "comment8 end of file</span></td>\n" +
+                "</tr num=\"65>", xsxml);
+
+        assertContains("<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>", xsxml);
+        assertContains("<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>", xsxml);
+        assertContains(
+                "<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>",
+                xsxml);
+        assertContains(
+                "<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>",
+                xsxml);
+        assertContains(
+                "<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>",
+                xsxml);
+        assertContains(
+                "<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>",
+                xsxml);
+
+    }
+
+    @Test
+    public void testComments() throws Exception {
+        List<String> sheetTexts = getSheets("comments.xlsb");
+        String xsxml = sheetTexts.get(0);
+        assertContains(
+                "<tr num=\"0>\n" +
+                        "\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" +
+                        "\t<td ref=\"B1\">row1</td>\n" +
+                        "</tr num=\"0>",  xsxml);
+        assertContains(
+                "<tr num=\"1>\n" +
+                        "\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                        "comment row2 (index1)</span></td>\n" +
+                        "</tr num=\"1>",
+                xsxml);
+        assertContains("<tr num=\"2>\n" +
+                "\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" +
+                "\t<td ref=\"B3\">row3</td>\n", xsxml);
+
+        assertContains("<tr num=\"3>\n" +
+                "\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" +
+                "\t<td ref=\"B4\">row4</td>\n" +
+                "</tr num=\"3></sheet>", xsxml);
+
+    }
+
+    private List<String> getSheets(String testFileName) throws Exception {
+        OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName));
+        List<String> sheetTexts = new ArrayList<String>();
+        XSSFBReader r = new XSSFBReader(pkg);
+
+//        assertNotNull(r.getWorkbookData());
+        //      assertNotNull(r.getSharedStringsData());
+        assertNotNull(r.getXSSFBStylesTable());
+        XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
+        XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
+        XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData();
+
+        while (it.hasNext()) {
+            InputStream is = it.next();
+            String name = it.getSheetName();
+            TestSheetHandler testSheetHandler = new TestSheetHandler();
+            testSheetHandler.startSheet(name);
+            XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
+                    xssfbStylesTable,
+                    it.getXSSFBSheetComments(),
+                    sst, testSheetHandler,
+                    new DataFormatter(),
+                    false);
+            sheetHandler.parse();
+            testSheetHandler.endSheet();
+            sheetTexts.add(testSheetHandler.toString());
+        }
+        return sheetTexts;
+
+    }
+
+    //This converts all [\r\n\t]+ to " "
+    private void assertContains(String needle, String haystack) {
+        needle = needle.replaceAll("[\r\n\t]+", " ");
+        haystack = haystack.replaceAll("[\r\n\t]+", " ");
+        if (haystack.indexOf(needle) < 0) {
+            fail("couldn't find >"+needle+"< in: "+haystack );
+        }
+    }
+
+
+    @Test
+    public void testDate() throws Exception {
+        List<String> sheets = getSheets("date.xlsb");
+        assertEquals(1, sheets.size());
+        assertContains("1/12/13", sheets.get(0));
+
+    }
+
+
+    private class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
+        private final StringBuilder sb = new StringBuilder();
+
+        public void startSheet(String sheetName) {
+            sb.append("<sheet name=\"").append(sheetName).append(">");
+        }
+
+        public void endSheet(){
+            sb.append("</sheet>");
+        }
+        @Override
+        public void startRow(int rowNum) {
+            sb.append("\n<tr num=\"").append(rowNum).append(">");
+        }
+
+        @Override
+        public void endRow(int rowNum) {
+            sb.append("\n</tr num=\"").append(rowNum).append(">");
+        }
+
+        @Override
+        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
+            formattedValue = (formattedValue == null) ? "" : formattedValue;
+            if (comment == null) {
+                sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
+            } else {
+                sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
+                        .append(formattedValue)
+                        .append("<span type=\"comment\" author=\"")
+                        .append(comment.getAuthor()).append("\">")
+                        .append(comment.getString().toString().trim()).append("</span>")
+                        .append("</td>");
+            }
+        }
+
+        @Override
+        public void headerFooter(String text, boolean isHeader, String tagName) {
+            if (isHeader) {
+                sb.append("<header tagName=\""+tagName+"\">"+text+"</header>");
+            } else {
+                sb.append("<footer tagName=\""+tagName+"\">"+text+"</footer>");
+
+            }
+        }
+
+        @Override
+        public String toString() {
+            return sb.toString();
+        }
+    }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java
new file mode 100644 (file)
index 0000000..da38882
--- /dev/null
@@ -0,0 +1,102 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.extractor;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.junit.Test;
+
+/**
+ * Tests for {@link XSSFBEventBasedExcelExtractor}
+ */
+public class TestXSSFBEventBasedExcelExtractor {
+
+
+    protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
+        return new XSSFBEventBasedExcelExtractor(XSSFTestDataSamples.
+                openSamplePackage(sampleName));
+    }
+
+    /**
+     * Get text out of the simple file
+     */
+    @Test
+    public void testGetSimpleText() throws Exception {
+        // a very simple file
+        XSSFEventBasedExcelExtractor extractor = getExtractor("sample.xlsb");
+        extractor.setIncludeCellComments(true);
+        extractor.getText();
+
+        String text = extractor.getText();
+        assertTrue(text.length() > 0);
+
+        // Check sheet names
+        assertTrue(text.startsWith("Sheet1"));
+        assertTrue(text.endsWith("Sheet3\n"));
+
+        // Now without, will have text
+        extractor.setIncludeSheetNames(false);
+        text = extractor.getText();
+        String CHUNK1 =
+                "Lorem\t111\n" +
+                        "ipsum\t222\n" +
+                        "dolor\t333\n" +
+                        "sit\t444\n" +
+                        "amet\t555\n" +
+                        "consectetuer\t666\n" +
+                        "adipiscing\t777\n" +
+                        "elit\t888\n" +
+                        "Nunc\t999\n";
+        String CHUNK2 =
+                "The quick brown fox jumps over the lazy dog\n" +
+                        "hello, xssf   hello, xssf\n" +
+                        "hello, xssf   hello, xssf\n" +
+                        "hello, xssf   hello, xssf\n" +
+                        "hello, xssf   hello, xssf\n";
+        assertEquals(
+                CHUNK1 +
+                        "at\t4995\n" +
+                        CHUNK2
+                , text);
+
+    }
+
+
+    /**
+     * Test text extraction from text box using getShapes()
+     *
+     * @throws Exception
+     */
+    @Test
+    public void testShapes() throws Exception {
+        XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsb");
+
+        try {
+            String text = ooxmlExtractor.getText();
+
+            assertTrue(text.indexOf("Line 1") > -1);
+            assertTrue(text.indexOf("Line 2") > -1);
+            assertTrue(text.indexOf("Line 3") > -1);
+        } finally {
+            ooxmlExtractor.close();
+        }
+    }
+
+}
diff --git a/test-data/spreadsheet/51519.xlsb b/test-data/spreadsheet/51519.xlsb
new file mode 100644 (file)
index 0000000..54876cd
Binary files /dev/null and b/test-data/spreadsheet/51519.xlsb differ
diff --git a/test-data/spreadsheet/WithTextBox.xlsb b/test-data/spreadsheet/WithTextBox.xlsb
new file mode 100644 (file)
index 0000000..558395d
Binary files /dev/null and b/test-data/spreadsheet/WithTextBox.xlsb differ
diff --git a/test-data/spreadsheet/comments.xlsb b/test-data/spreadsheet/comments.xlsb
new file mode 100644 (file)
index 0000000..881e51c
Binary files /dev/null and b/test-data/spreadsheet/comments.xlsb differ
diff --git a/test-data/spreadsheet/date.xlsb b/test-data/spreadsheet/date.xlsb
new file mode 100644 (file)
index 0000000..d47d602
Binary files /dev/null and b/test-data/spreadsheet/date.xlsb differ
diff --git a/test-data/spreadsheet/hyperlink.xlsb b/test-data/spreadsheet/hyperlink.xlsb
new file mode 100644 (file)
index 0000000..2a08936
Binary files /dev/null and b/test-data/spreadsheet/hyperlink.xlsb differ
diff --git a/test-data/spreadsheet/sample.xlsb b/test-data/spreadsheet/sample.xlsb
new file mode 100644 (file)
index 0000000..676b4da
Binary files /dev/null and b/test-data/spreadsheet/sample.xlsb differ
diff --git a/test-data/spreadsheet/testVarious.xlsb b/test-data/spreadsheet/testVarious.xlsb
new file mode 100644 (file)
index 0000000..22cc9b4
Binary files /dev/null and b/test-data/spreadsheet/testVarious.xlsb differ