import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
+import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xssf.usermodel.XSSFRelation;
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
}
+ // How about xlsb?
+ for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
+ if (rel.getContentType().equals(contentType)) {
+ return new XSSFBEventBasedExcelExtractor(pkg);
+ }
+ }
+
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
} catch (IOException e) {
import org.apache.poi.UnsupportedFileFormatException;
/**
- * We don't support .xlsb files, sorry
+ * We don't support .xlsb for read and write via {@link org.apache.poi.xssf.usermodel.XSSFWorkbook}.
+ * As of POI 3.15-beta3, we do support streaming reading of xlsb files
+ * via {@link org.apache.poi.xssf.eventusermodel.XSSFBReader}
*/
public class XLSBUnsupportedException extends UnsupportedFileFormatException {
private static final long serialVersionUID = 7849681804154571175L;
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.ss.util.CellReference;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * This class encapsulates what the spec calls a "Cell" object.
+ * I added "Header" to clarify that this does not contain the contents
+ * of the cell, only the column number, the style id and the phonetic boolean
+ */
+@Internal
+class XSSFBCellHeader {
+ public static int length = 8;
+
+ /**
+ *
+ * @param data raw data
+ * @param offset offset at which to start reading the record
+ * @param currentRow 0-based current row count
+ * @param cell cell buffer to update
+ */
+ public static void parse(byte[] data, int offset, int currentRow, XSSFBCellHeader cell) {
+ long colNum = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+ int styleIdx = XSSFBUtils.get24BitInt(data, offset); offset += 3;
+ //TODO: range checking
+ boolean showPhonetic = false;//TODO: fill this out
+ cell.reset(currentRow, (int)colNum, styleIdx, showPhonetic);
+ }
+
+ private int rowNum;
+ private int colNum;
+ private int styleIdx;
+ private boolean showPhonetic;
+
+ public void reset(int rowNum, int colNum, int styleIdx, boolean showPhonetic) {
+ this.rowNum = rowNum;
+ this.colNum = colNum;
+ this.styleIdx = styleIdx;
+ this.showPhonetic = showPhonetic;
+ }
+
+ int getColNum() {
+ return colNum;
+ }
+
+ String formatAddressAsString() {
+ return CellReference.convertNumToColString(colNum)+(rowNum+1);
+ }
+
+ int getStyleIdx() {
+ return styleIdx;
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+
+@Internal
+class XSSFBCellRange {
+
+ public final static int length = 4* LittleEndian.INT_SIZE;
+ /**
+ * Parses an RfX cell range from the data starting at the offset.
+ * This performs no range checking.
+ * @param data raw bytes
+ * @param offset offset at which to start reading from data
+ * @param cellRange to overwrite. If null, a new cellRange will be created.
+ * @return a mutable cell range.
+ */
+ public static XSSFBCellRange parse(byte[] data, int offset, XSSFBCellRange cellRange) {
+ if (cellRange == null) {
+ cellRange = new XSSFBCellRange();
+ }
+ cellRange.firstRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
+ cellRange.lastRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
+ cellRange.firstCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
+ cellRange.lastCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset));
+
+ return cellRange;
+ }
+
+ int firstRow;
+ int lastRow;
+ int firstCol;
+ int lastCol;
+
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+
+import org.apache.poi.ss.usermodel.ClientAnchor;
+import org.apache.poi.ss.usermodel.RichTextString;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.util.Internal;
+import org.apache.poi.xssf.usermodel.XSSFComment;
+
+@Internal
+class XSSFBComment extends XSSFComment {
+
+ private final CellAddress cellAddress;
+ private final String author;
+ private final XSSFBRichTextString comment;
+ private boolean visible = true;
+
+ XSSFBComment(CellAddress cellAddress, String author, String comment) {
+ super(null, null, null);
+ this.cellAddress = cellAddress;
+ this.author = author;
+ this.comment = new XSSFBRichTextString(comment);
+ }
+
+ @Override
+ public void setVisible(boolean visible) {
+ throw new IllegalArgumentException("XSSFBComment is read only.");
+ }
+
+ @Override
+ public boolean isVisible() {
+ return visible;
+ }
+
+ @Override
+ public CellAddress getAddress() {
+ return cellAddress;
+ }
+
+ @Override
+ public void setAddress(CellAddress addr) {
+ throw new IllegalArgumentException("XSSFBComment is read only");
+ }
+
+ @Override
+ public void setAddress(int row, int col) {
+ throw new IllegalArgumentException("XSSFBComment is read only");
+
+ }
+
+ @Override
+ public int getRow() {
+ return cellAddress.getRow();
+ }
+
+ @Override
+ public void setRow(int row) {
+ throw new IllegalArgumentException("XSSFBComment is read only");
+ }
+
+ @Override
+ public int getColumn() {
+ return cellAddress.getColumn();
+ }
+
+ @Override
+ public void setColumn(int col) {
+ throw new IllegalArgumentException("XSSFBComment is read only");
+ }
+
+ @Override
+ public String getAuthor() {
+ return author;
+ }
+
+ @Override
+ public void setAuthor(String author) {
+ throw new IllegalArgumentException("XSSFBComment is read only");
+ }
+
+ @Override
+ public XSSFBRichTextString getString() {
+ return comment;
+ }
+
+ @Override
+ public void setString(RichTextString string) {
+ throw new IllegalArgumentException("XSSFBComment is read only");
+ }
+
+ @Override
+ public ClientAnchor getClientAnchor() {
+ return null;
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.TreeMap;
+
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+
+@Internal
+public class XSSFBCommentsTable extends XSSFBParser {
+
+ private Map<CellAddress, XSSFBComment> comments = new TreeMap<CellAddress, XSSFBComment>(new CellAddressComparator());//String is the cellAddress A1
+ private Queue<CellAddress> commentAddresses = new LinkedList<CellAddress>();
+ private List<String> authors = new ArrayList<String>();
+
+ //these are all used only during parsing, and they are mutable!
+ private int authorId = -1;
+ private CellAddress cellAddress = null;
+ private XSSFBCellRange cellRange = null;
+ private String comment = null;
+ private StringBuilder authorBuffer = new StringBuilder();
+
+
+ public XSSFBCommentsTable(InputStream is) throws IOException {
+ super(is);
+ parse();
+ commentAddresses.addAll(comments.keySet());
+ }
+
+ @Override
+ public void handleRecord(int id, byte[] data) throws XSSFBParseException {
+ XSSFBRecordType recordType = XSSFBRecordType.lookup(id);
+ switch (recordType) {
+ case BrtBeginComment:
+ int offset = 0;
+ authorId = XSSFBUtils.castToInt(LittleEndian.getUInt(data)); offset += LittleEndian.INT_SIZE;
+ cellRange = XSSFBCellRange.parse(data, offset, cellRange);
+ offset+= XSSFBCellRange.length;
+ //for strict parsing; confirm that firstRow==lastRow and firstCol==colLats (2.4.28)
+ cellAddress = new CellAddress(cellRange.firstRow, cellRange.firstCol);
+ break;
+ case BrtCommentText:
+ XSSFBRichStr xssfbRichStr = XSSFBRichStr.build(data, 0);
+ comment = xssfbRichStr.getString();
+ break;
+ case BrtEndComment:
+ comments.put(cellAddress, new XSSFBComment(cellAddress, authors.get(authorId), comment));
+ authorId = -1;
+ cellAddress = null;
+ break;
+ case BrtCommentAuthor:
+ authorBuffer.setLength(0);
+ XSSFBUtils.readXLWideString(data, 0, authorBuffer);
+ authors.add(authorBuffer.toString());
+ break;
+ }
+ }
+
+
+ public Queue<CellAddress> getAddresses() {
+ return commentAddresses;
+ }
+
+ public XSSFBComment get(CellAddress cellAddress) {
+ if (cellAddress == null) {
+ return null;
+ }
+ return comments.get(cellAddress);
+ }
+
+ private final static class CellAddressComparator implements Comparator<CellAddress> {
+
+ @Override
+ public int compare(CellAddress o1, CellAddress o2) {
+ if (o1.getRow() < o2.getRow()) {
+ return -1;
+ } else if (o1.getRow() > o2.getRow()) {
+ return 1;
+ }
+ if (o1.getColumn() < o2.getColumn()) {
+ return -1;
+ } else if (o1.getColumn() > o2.getColumn()) {
+ return 1;
+ }
+ return 0;
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper;
+
+@Internal
+class XSSFBHeaderFooter {
+ private final String headerFooterTypeLabel;
+ private final boolean isHeader;
+ private String rawString;
+ private HeaderFooterHelper headerFooterHelper = new HeaderFooterHelper();
+
+
+ XSSFBHeaderFooter(String headerFooterTypeLabel, boolean isHeader) {
+ this.headerFooterTypeLabel = headerFooterTypeLabel;
+ this.isHeader = isHeader;
+ }
+
+ String getHeaderFooterTypeLabel() {
+ return headerFooterTypeLabel;
+ }
+
+ String getRawString() {
+ return rawString;
+ }
+
+ String getString() {
+ StringBuilder sb = new StringBuilder();
+ String left = headerFooterHelper.getLeftSection(rawString);
+ String center = headerFooterHelper.getCenterSection(rawString);
+ String right = headerFooterHelper.getRightSection(rawString);
+ if (left != null && left.length() > 0) {
+ sb.append(left);
+ }
+ if (center != null && center.length() > 0) {
+ if (sb.length() > 0) {
+ sb.append(" ");
+ }
+ sb.append(center);
+ }
+ if (right != null && right.length() > 0) {
+ if (sb.length() > 0) {
+ sb.append(" ");
+ }
+ sb.append(right);
+ }
+ return sb.toString();
+ }
+
+ void setRawString(String rawString) {
+ this.rawString = rawString;
+ }
+
+ boolean isHeader() {
+ return isHeader;
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+class XSSFBHeaderFooters {
+
+ public static XSSFBHeaderFooters parse(byte[] data) {
+ boolean diffOddEven = false;
+ boolean diffFirst = false;
+ boolean scaleWDoc = false;
+ boolean alignMargins = false;
+
+ int offset = 2;
+ XSSFBHeaderFooters xssfbHeaderFooter = new XSSFBHeaderFooters();
+ xssfbHeaderFooter.header = new XSSFBHeaderFooter("header", true);
+ xssfbHeaderFooter.footer = new XSSFBHeaderFooter("footer", false);
+ xssfbHeaderFooter.headerEven = new XSSFBHeaderFooter("evenHeader", true);
+ xssfbHeaderFooter.footerEven = new XSSFBHeaderFooter("evenFooter", false);
+ xssfbHeaderFooter.headerFirst = new XSSFBHeaderFooter("firstHeader", true);
+ xssfbHeaderFooter.footerFirst = new XSSFBHeaderFooter("firstFooter", false);
+ offset += readHeaderFooter(data, offset, xssfbHeaderFooter.header);
+ offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footer);
+ offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerEven);
+ offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footerEven);
+ offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerFirst);
+ readHeaderFooter(data, offset, xssfbHeaderFooter.footerFirst);
+ return xssfbHeaderFooter;
+ }
+
+ private static int readHeaderFooter(byte[] data, int offset, XSSFBHeaderFooter headerFooter) {
+ if (offset + 4 >= data.length) {
+ return 0;
+ }
+ StringBuilder sb = new StringBuilder();
+ int bytesRead = XSSFBUtils.readXLNullableWideString(data, offset, sb);
+ headerFooter.setRawString(sb.toString());
+ return bytesRead;
+ }
+
+ private XSSFBHeaderFooter header;
+ private XSSFBHeaderFooter footer;
+ private XSSFBHeaderFooter headerEven;
+ private XSSFBHeaderFooter footerEven;
+ private XSSFBHeaderFooter headerFirst;
+ private XSSFBHeaderFooter footerFirst;
+
+ public XSSFBHeaderFooter getHeader() {
+ return header;
+ }
+
+ public XSSFBHeaderFooter getFooter() {
+ return footer;
+ }
+
+ public XSSFBHeaderFooter getHeaderEven() {
+ return headerEven;
+ }
+
+ public XSSFBHeaderFooter getFooterEven() {
+ return footerEven;
+ }
+
+ public XSSFBHeaderFooter getHeaderFirst() {
+ return headerFirst;
+ }
+
+ public XSSFBHeaderFooter getFooterFirst() {
+ return footerFirst;
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.ss.util.CellRangeUtil;
+import org.apache.poi.util.Internal;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+
+@Internal
+public class XSSFBHyperlinksTable {
+
+ private final static BitSet RECORDS = new BitSet();
+
+
+ static {
+ RECORDS.set(XSSFBRecordType.BrtHLink.getId());
+ }
+
+
+ private final List<XSSFHyperlinkRecord> hyperlinkRecords = new ArrayList<XSSFHyperlinkRecord>();
+
+ //cache the relId to hyperlink url from the sheet's .rels
+ private Map<String, String> relIdToHyperlink = new HashMap<String, String>();
+
+ public XSSFBHyperlinksTable(PackagePart sheetPart) throws IOException {
+ //load the urls from the sheet .rels
+ loadUrlsFromSheetRels(sheetPart);
+ //now load the hyperlinks from the bottom of the sheet
+ HyperlinkSheetScraper scraper = new HyperlinkSheetScraper(sheetPart.getInputStream());
+ scraper.parse();
+ }
+
+ /**
+ *
+ * @return a map of the hyperlinks. The key is the top left cell address in their CellRange
+ */
+ public Map<CellAddress, List<XSSFHyperlinkRecord>> getHyperLinks() {
+ Map<CellAddress, List<XSSFHyperlinkRecord>> hyperlinkMap =
+ new TreeMap<CellAddress, List<XSSFHyperlinkRecord>>(new TopLeftCellAddressComparator());
+ for (XSSFHyperlinkRecord hyperlinkRecord : hyperlinkRecords) {
+ CellAddress cellAddress = new CellAddress(hyperlinkRecord.getCellRangeAddress().getFirstRow(),
+ hyperlinkRecord.getCellRangeAddress().getFirstColumn());
+ List<XSSFHyperlinkRecord> list = hyperlinkMap.get(cellAddress);
+ if (list == null) {
+ list = new ArrayList<XSSFHyperlinkRecord>();
+ }
+ list.add(hyperlinkRecord);
+ hyperlinkMap.put(cellAddress, list);
+ }
+ return hyperlinkMap;
+ }
+
+
+ /**
+ *
+ * @param cellAddress cell address to find
+ * @return null if not a hyperlink
+ */
+ public List<XSSFHyperlinkRecord> findHyperlinkRecord(CellAddress cellAddress) {
+ List<XSSFHyperlinkRecord> overlapping = null;
+ CellRangeAddress targetCellRangeAddress = new CellRangeAddress(cellAddress.getRow(),
+ cellAddress.getRow(),
+ cellAddress.getColumn(),
+ cellAddress.getColumn());
+ for (XSSFHyperlinkRecord record : hyperlinkRecords) {
+ if (CellRangeUtil.intersect(targetCellRangeAddress, record.getCellRangeAddress()) != CellRangeUtil.NO_INTERSECTION) {
+ if (overlapping == null) {
+ overlapping = new ArrayList<XSSFHyperlinkRecord>();
+ }
+ overlapping.add(record);
+ }
+ }
+ return overlapping;
+ }
+
+ private void loadUrlsFromSheetRels(PackagePart sheetPart) {
+ try {
+ for (PackageRelationship rel : sheetPart.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation())) {
+ relIdToHyperlink.put(rel.getId(), rel.getTargetURI().toString());
+ }
+ } catch (InvalidFormatException e) {
+ //swallow
+ }
+ }
+
+ private class HyperlinkSheetScraper extends XSSFBParser {
+
+ private XSSFBCellRange hyperlinkCellRange = new XSSFBCellRange();
+ private final StringBuilder xlWideStringBuffer = new StringBuilder();
+
+ HyperlinkSheetScraper(InputStream is) {
+ super(is, RECORDS);
+ }
+
+ @Override
+ public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+ if (recordType != XSSFBRecordType.BrtHLink.getId()) {
+ return;
+ }
+ int offset = 0;
+ String relId = "";
+ String location = "";
+ String toolTip = "";
+ String display = "";
+
+ hyperlinkCellRange = XSSFBCellRange.parse(data, offset, hyperlinkCellRange);
+ offset += XSSFBCellRange.length;
+ xlWideStringBuffer.setLength(0);
+ offset += XSSFBUtils.readXLNullableWideString(data, offset, xlWideStringBuffer);
+ relId = xlWideStringBuffer.toString();
+ xlWideStringBuffer.setLength(0);
+ offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
+ location = xlWideStringBuffer.toString();
+ xlWideStringBuffer.setLength(0);
+ offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
+ toolTip = xlWideStringBuffer.toString();
+ xlWideStringBuffer.setLength(0);
+ offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
+ display = xlWideStringBuffer.toString();
+ CellRangeAddress cellRangeAddress = new CellRangeAddress(hyperlinkCellRange.firstRow, hyperlinkCellRange.lastRow, hyperlinkCellRange.firstCol, hyperlinkCellRange.lastCol);
+
+ String url = relIdToHyperlink.get(relId);
+ if (location == null || location.length() == 0) {
+ location = url;
+ }
+
+ hyperlinkRecords.add(
+ new XSSFHyperlinkRecord(cellRangeAddress, relId, location, toolTip, display)
+ );
+ }
+ }
+
+ private static class TopLeftCellAddressComparator implements Comparator<CellAddress> {
+
+ @Override
+ public int compare(CellAddress o1, CellAddress o2) {
+ if (o1.getRow() < o2.getRow()) {
+ return -1;
+ } else if (o1.getRow() > o2.getRow()) {
+ return 1;
+ }
+ if (o1.getColumn() < o2.getColumn()) {
+ return -1;
+ } else if (o1.getColumn() > o2.getColumn()) {
+ return 1;
+ }
+ return 0;
+ }
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+/**
+ * Parse exception while reading an xssfb
+ */
+public class XSSFBParseException extends RuntimeException {
+
+ public XSSFBParseException(String msg) {
+ super(msg);
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.BitSet;
+
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndianInputStream;
+
+/**
+ * Experimental parser for Microsoft's ooxml xssfb format.
+ * Not thread safe, obviously. Need to create a new one
+ * for each thread.
+ */
+@Internal
+public abstract class XSSFBParser {
+
+ private final LittleEndianInputStream is;
+ private final BitSet records;
+
+ public XSSFBParser(InputStream is) {
+ this.is = new LittleEndianInputStream(is);
+ records = null;
+ }
+
+ XSSFBParser(InputStream is, BitSet bitSet) {
+ this.is = new LittleEndianInputStream(is);
+ records = bitSet;
+ }
+
+ public void parse() throws IOException {
+
+ while (true) {
+ int bInt = is.read();
+ if (bInt == -1) {
+ return;
+ }
+ readNext((byte) bInt);
+ }
+ }
+
+ private void readNext(byte b1) throws IOException {
+ int recordId = 0;
+
+ //if highest bit == 1
+ if ((b1 >> 7 & 1) == 1) {
+ byte b2 = is.readByte();
+ b1 &= ~(1<<7); //unset highest bit
+ b2 &= ~(1<<7); //unset highest bit (if it exists?)
+ recordId = (128*(int)b2)+(int)b1;
+ } else {
+ recordId = (int)b1;
+ }
+
+ long recordLength = 0;
+ int i = 0;
+ boolean halt = false;
+ while (i < 4 && ! halt) {
+ byte b = is.readByte();
+ halt = (b >> 7 & 1) == 0; //if highest bit !=1 then continue
+ b &= ~(1<<7);
+ recordLength += (int)b << (i*7); //multiply by 128^i
+ i++;
+
+ }
+ if (records == null || records.get(recordId)) {
+ //add sanity check for length?
+ byte[] buff = new byte[(int) recordLength];
+ is.readFully(buff);
+ handleRecord(recordId, buff);
+ } else {
+ long length = is.skip(recordLength);
+ if (length != recordLength) {
+ throw new XSSFBParseException("End of file reached before expected.\t"+
+ "Tried to skip "+recordLength + ", but only skipped "+length);
+ }
+ }
+ }
+
+ //It hurts, hurts, hurts to create a new byte array for every record.
+ //However, on a large Excel spreadsheet, this parser was 1/3 faster than
+ //the ooxml sax parser (5 seconds for xssfb and 7.5 seconds for xssf.
+ //The code is far cleaner to have the parser read all
+ //of the data rather than having every component promise that it read
+ //the correct amount.
+ abstract public void handleRecord(int recordType, byte[] data) throws XSSFBParseException;
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+public enum XSSFBRecordType {
+
+ BrtCellBlank(1),
+ BrtCellRk(2),
+ BrtCellError(3),
+ BrtCellBool(4),
+ BrtCellReal(5),
+ BrtCellSt(6),
+ BrtCellIsst(7),
+ BrtFmlaString(8),
+ BrtFmlaNum(9),
+ BrtFmlaBool(10),
+ BrtFmlaError(11),
+ BrtRowHdr(0),
+ BrtCellRString(62),
+ BrtBeginSheet(129),
+ BrtWsProp(147),
+ BrtWsDim(148),
+ BrtColInfo(60),
+ BrtBeginSheetData(145),
+ BrtEndSheetData(146),
+ BrtHLink(494),
+ BrtBeginHeaderFooter(479),
+
+ //comments
+ BrtBeginCommentAuthors(630),
+ BrtEndCommentAuthors(631),
+ BrtCommentAuthor(632),
+ BrtBeginComment(635),
+ BrtCommentText(637),
+ BrtEndComment(636),
+ //styles table
+ BrtXf(47),
+ BrtFmt(44),
+ BrtBeginFmts(615),
+ BrtEndFmts(616),
+ BrtBeginCellXFs(617),
+ BrtEndCellXFs(618),
+ BrtBeginCellStyleXFS(626),
+ BrtEndCellStyleXFS(627),
+
+ //stored strings table
+ BrtSstItem(19), //stored strings items
+ BrtBeginSst(159), //stored strings begin sst
+ BrtEndSst(160), //stored strings end sst
+
+ BrtBundleSh(156), //defines worksheet in wb part
+ Unimplemented(-1);
+
+
+ private final int id;
+
+ XSSFBRecordType(int id) {
+ this.id = id;
+ }
+
+ public int getId() {
+ return id;
+ }
+
+ public static XSSFBRecordType lookup(int id) {
+ for (XSSFBRecordType r : XSSFBRecordType.values()) {
+ if (r.id == id) {
+ return r;
+ }
+ }
+ return Unimplemented;
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import org.apache.poi.POIXMLDocumentPart;
+import org.apache.poi.POIXMLRelation;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
+
+/**
+ * Need to have this mirror class of {@link org.apache.poi.xssf.usermodel.XSSFRelation}
+ * because of conflicts with regular ooxml relations.
+ * If we failed to break this into a separate class, in the cases of SharedStrings and Styles,
+ * 2 parts would exist, and "Packages shall not contain equivalent part names..."
+ * <p>
+ * Also, we need to avoid the possibility of breaking the marshalling process for xml.
+ */
+@Internal
+public class XSSFBRelation extends POIXMLRelation {
+ private static final POILogger log = POILogFactory.getLogger(XSSFBRelation.class);
+
+ static final XSSFBRelation SHARED_STRINGS_BINARY = new XSSFBRelation(
+ "application/vnd.ms-excel.sharedStrings",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings",
+ "/xl/sharedStrings.bin",
+ null
+ );
+
+ public static final XSSFBRelation STYLES_BINARY = new XSSFBRelation(
+ "application/vnd.ms-excel.styles",
+ PackageRelationshipTypes.STYLE_PART,
+ "/xl/styles.bin",
+ null
+ );
+
+ private XSSFBRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
+ super(type, rel, defaultName, cls);
+ }
+
+ /**
+ * Fetches the InputStream to read the contents, based
+ * of the specified core part, for which we are defined
+ * as a suitable relationship
+ */
+ public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
+ PackageRelationshipCollection prc =
+ corePart.getRelationshipsByType(getRelation());
+ Iterator<PackageRelationship> it = prc.iterator();
+ if (it.hasNext()) {
+ PackageRelationship rel = it.next();
+ PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
+ PackagePart part = corePart.getPackage().getPart(relName);
+ return part.getInputStream();
+ }
+ log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
+ return null;
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.util.Internal;
+
+@Internal
+class XSSFBRichStr {
+
+ public static XSSFBRichStr build(byte[] bytes, int offset) throws XSSFBParseException {
+ byte first = bytes[offset];
+ boolean dwSizeStrRunExists = (first >> 7 & 1) == 1;//first bit == 1?
+ boolean phoneticExists = (first >> 6 & 1) == 1;//second bit == 1?
+ StringBuilder sb = new StringBuilder();
+
+ int read = XSSFBUtils.readXLWideString(bytes, offset+1, sb);
+ //TODO: parse phonetic strings.
+ return new XSSFBRichStr(sb.toString(), "");
+ }
+
+ private final String string;
+ private final String phoneticString;
+
+ XSSFBRichStr(String string, String phoneticString) {
+ this.string = string;
+ this.phoneticString = phoneticString;
+ }
+
+ public String getString() {
+ return string;
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.ss.usermodel.Font;
+import org.apache.poi.util.Internal;
+import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+
+/**
+ * Wrapper class around String so that we can use it in Comment.
+ * Nothing has been implemented yet except for {@link #getString()}.
+ */
+@Internal
+class XSSFBRichTextString extends XSSFRichTextString {
+ private final String string;
+
+ XSSFBRichTextString(String string) {
+ this.string = string;
+ }
+
+ @Override
+ public void applyFont(int startIndex, int endIndex, short fontIndex) {
+
+ }
+
+ @Override
+ public void applyFont(int startIndex, int endIndex, Font font) {
+
+ }
+
+ @Override
+ public void applyFont(Font font) {
+
+ }
+
+ @Override
+ public void clearFormatting() {
+
+ }
+
+ @Override
+ public String getString() {
+ return string;
+ }
+
+ @Override
+ public int length() {
+ return string.length();
+ }
+
+ @Override
+ public int numFormattingRuns() {
+ return 0;
+ }
+
+ @Override
+ public int getIndexOfFormattingRun(int index) {
+ return 0;
+ }
+
+ @Override
+ public void applyFont(short fontIndex) {
+
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.binary;
+
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.xml.sax.SAXException;
+
+@Internal
+public class XSSFBSharedStringsTable {
+
+ /**
+ * An integer representing the total count of strings in the workbook. This count does not
+ * include any numbers, it counts only the total of text strings in the workbook.
+ */
+ private int count;
+
+ /**
+ * An integer representing the total count of unique strings in the Shared String Table.
+ * A string is unique even if it is a copy of another string, but has different formatting applied
+ * at the character level.
+ */
+ private int uniqueCount;
+
+ /**
+ * The shared strings table.
+ */
+ private List<String> strings = new ArrayList<String>();
+
+ /**
+ * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
+ * @throws IOException If reading the data from the package fails.
+ * @throws SAXException if parsing the XML data fails.
+ */
+ public XSSFBSharedStringsTable(OPCPackage pkg)
+ throws IOException, SAXException {
+ ArrayList<PackagePart> parts =
+ pkg.getPartsByContentType(XSSFBRelation.SHARED_STRINGS_BINARY.getContentType());
+
+ // Some workbooks have no shared strings table.
+ if (parts.size() > 0) {
+ PackagePart sstPart = parts.get(0);
+
+ readFrom(sstPart.getInputStream());
+ }
+ }
+
+ /**
+ * Like POIXMLDocumentPart constructor
+ *
+ * @since POI 3.14-Beta3
+ */
+ XSSFBSharedStringsTable(PackagePart part) throws IOException, SAXException {
+ readFrom(part.getInputStream());
+ }
+
+ private void readFrom(InputStream inputStream) throws IOException {
+ SSTBinaryReader reader = new SSTBinaryReader(inputStream);
+ reader.parse();
+ }
+
+ public List<String> getItems() {
+ return strings;
+ }
+
+ public String getEntryAt(int i) {
+ return strings.get(i);
+ }
+
+ /**
+ * Return an integer representing the total count of strings in the workbook. This count does not
+ * include any numbers, it counts only the total of text strings in the workbook.
+ *
+ * @return the total count of strings in the workbook
+ */
+ public int getCount() {
+ return this.count;
+ }
+
+ /**
+ * Returns an integer representing the total count of unique strings in the Shared String Table.
+ * A string is unique even if it is a copy of another string, but has different formatting applied
+ * at the character level.
+ *
+ * @return the total count of unique strings in the workbook
+ */
+ public int getUniqueCount() {
+ return this.uniqueCount;
+ }
+
+ private class SSTBinaryReader extends XSSFBParser {
+
+ SSTBinaryReader(InputStream is) {
+ super(is);
+ }
+
+ @Override
+ public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+ XSSFBRecordType type = XSSFBRecordType.lookup(recordType);
+
+ switch (type) {
+ case BrtSstItem:
+ XSSFBRichStr rstr = XSSFBRichStr.build(data, 0);
+ strings.add(rstr.getString());
+ break;
+ case BrtBeginSst:
+ count = (int) LittleEndian.getUInt(data,0);
+ uniqueCount = (int) LittleEndian.getUInt(data, 4);
+ break;
+ }
+
+ }
+ }
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+
+import java.io.InputStream;
+import java.util.Queue;
+
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
+import org.apache.poi.xssf.usermodel.XSSFComment;
+import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+
+@Internal
+public class XSSFBSheetHandler extends XSSFBParser {
+
+ private final static int CHECK_ALL_ROWS = -1;
+
+ private final XSSFBSharedStringsTable stringsTable;
+ private final XSSFSheetXMLHandler.SheetContentsHandler handler;
+ private final XSSFBStylesTable styles;
+ private final XSSFBCommentsTable comments;
+ private final DataFormatter dataFormatter;
+ private final boolean formulasNotResults;//TODO: implement this
+
+ private int lastEndedRow = -1;
+ private int lastStartedRow = -1;
+ private int currentRow = 0;
+ private byte[] rkBuffer = new byte[8];
+ private XSSFBCellRange hyperlinkCellRange = null;
+ private StringBuilder xlWideStringBuffer = new StringBuilder();
+
+ private final XSSFBCellHeader cellBuffer = new XSSFBCellHeader();
+ public XSSFBSheetHandler(InputStream is,
+ XSSFBStylesTable styles,
+ XSSFBCommentsTable comments,
+ XSSFBSharedStringsTable strings,
+ XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
+ DataFormatter dataFormatter,
+ boolean formulasNotResults) {
+ super(is);
+ this.styles = styles;
+ this.comments = comments;
+ this.stringsTable = strings;
+ this.handler = sheetContentsHandler;
+ this.dataFormatter = dataFormatter;
+ this.formulasNotResults = formulasNotResults;
+ }
+
+ @Override
+ public void handleRecord(int id, byte[] data) throws XSSFBParseException {
+ XSSFBRecordType type = XSSFBRecordType.lookup(id);
+
+ switch(type) {
+ case BrtRowHdr:
+ long rw = LittleEndian.getUInt(data, 0);
+ if (rw > 0x00100000L) {//could make sure this is larger than currentRow, according to spec?
+ throw new XSSFBParseException("Row number beyond allowable range: "+rw);
+ }
+ currentRow = (int)rw;
+ checkMissedComments(currentRow);
+ startRow(currentRow);
+ break;
+ case BrtCellIsst:
+ handleBrtCellIsst(data);
+ break;
+ case BrtCellSt: //TODO: needs test
+ handleCellSt(data);
+ break;
+ case BrtCellRk:
+ handleCellRk(data);
+ break;
+ case BrtCellReal:
+ handleCellReal(data);
+ break;
+ case BrtCellBool:
+ handleBoolean(data);
+ break;
+ case BrtCellError:
+ handleCellError(data);
+ break;
+ case BrtCellBlank:
+ beforeCellValue(data);//read cell info and check for missing comments
+ break;
+ case BrtFmlaString:
+ handleFmlaString(data);
+ break;
+ case BrtFmlaNum:
+ handleFmlaNum(data);
+ break;
+ case BrtFmlaError:
+ handleFmlaError(data);
+ break;
+ //TODO: All the PCDI and PCDIA
+ case BrtEndSheetData:
+ checkMissedComments(CHECK_ALL_ROWS);
+ endRow(lastStartedRow);
+ break;
+ case BrtBeginHeaderFooter:
+ handleHeaderFooter(data);
+ break;
+ }
+ }
+
+
+ private void beforeCellValue(byte[] data) {
+ XSSFBCellHeader.parse(data, 0, currentRow, cellBuffer);
+ checkMissedComments(currentRow, cellBuffer.getColNum());
+ }
+
+ private void handleCellValue(String formattedValue) {
+ CellAddress cellAddress = new CellAddress(currentRow, cellBuffer.getColNum());
+ XSSFBComment comment = null;
+ if (comments != null) {
+ comment = comments.get(cellAddress);
+ }
+ handler.cell(cellAddress.formatAsString(), formattedValue, comment);
+ }
+
+ private void handleFmlaNum(byte[] data) {
+ beforeCellValue(data);
+ //xNum
+ double val = LittleEndian.getDouble(data, XSSFBCellHeader.length);
+ String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
+ String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
+ handleCellValue(formattedVal);
+ }
+
+ private void handleCellSt(byte[] data) {
+ beforeCellValue(data);
+ xlWideStringBuffer.setLength(0);
+ XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer);
+ handleCellValue(xlWideStringBuffer.toString());
+ }
+
+ private void handleFmlaString(byte[] data) {
+ beforeCellValue(data);
+ xlWideStringBuffer.setLength(0);
+ XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer);
+ handleCellValue(xlWideStringBuffer.toString());
+ }
+
+ private void handleCellError(byte[] data) {
+ beforeCellValue(data);
+ //TODO, read byte to figure out the type of error
+ handleCellValue("ERROR");
+ }
+
+ private void handleFmlaError(byte[] data) {
+ beforeCellValue(data);
+ //TODO, read byte to figure out the type of error
+ handleCellValue("ERROR");
+ }
+
+ private void handleBoolean(byte[] data) {
+ beforeCellValue(data);
+ String formattedVal = (data[XSSFBCellHeader.length] == 1) ? "TRUE" : "FALSE";
+ handleCellValue(formattedVal);
+ }
+
+ private void handleCellReal(byte[] data) {
+ beforeCellValue(data);
+ //xNum
+ double val = LittleEndian.getDouble(data, XSSFBCellHeader.length);
+ String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
+ String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
+ handleCellValue(formattedVal);
+ }
+
+ private void handleCellRk(byte[] data) {
+ beforeCellValue(data);
+ double val = rkNumber(data, XSSFBCellHeader.length);
+ String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
+ String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
+ handleCellValue(formattedVal);
+ }
+
+ private void handleBrtCellIsst(byte[] data) {
+ beforeCellValue(data);
+ long idx = LittleEndian.getUInt(data, XSSFBCellHeader.length);
+ //check for out of range, buffer overflow
+
+ XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt((int)idx));
+ handleCellValue(rtss.getString());
+ }
+
+
+ private void handleHeaderFooter(byte[] data) {
+ XSSFBHeaderFooters headerFooter = XSSFBHeaderFooters.parse(data);
+ outputHeaderFooter(headerFooter.getHeader());
+ outputHeaderFooter(headerFooter.getFooter());
+ outputHeaderFooter(headerFooter.getHeaderEven());
+ outputHeaderFooter(headerFooter.getFooterEven());
+ outputHeaderFooter(headerFooter.getHeaderFirst());
+ outputHeaderFooter(headerFooter.getFooterFirst());
+ }
+
+ private void outputHeaderFooter(XSSFBHeaderFooter headerFooter) {
+ String text = headerFooter.getString();
+ if (text != null && text.trim().length() > 0) {
+ handler.headerFooter(text, headerFooter.isHeader(), headerFooter.getHeaderFooterTypeLabel());
+ }
+ }
+
+
+ //at start of next cell or end of row, return the cellAddress if it equals currentRow and col
+ private void checkMissedComments(int currentRow, int colNum) {
+ if (comments == null) {
+ return;
+ }
+ Queue<CellAddress> queue = comments.getAddresses();
+ while (queue.size() > 0) {
+ CellAddress cellAddress = queue.peek();
+ if (cellAddress.getRow() == currentRow && cellAddress.getColumn() < colNum) {
+ cellAddress = queue.remove();
+ dumpEmptyCellComment(cellAddress, comments.get(cellAddress));
+ } else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() == colNum) {
+ queue.remove();
+ return;
+ } else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() > colNum) {
+ return;
+ } else if (cellAddress.getRow() > currentRow) {
+ return;
+ }
+ }
+ }
+
+ //check for anything from rows before
+ private void checkMissedComments(int currentRow) {
+ if (comments == null) {
+ return;
+ }
+ Queue<CellAddress> queue = comments.getAddresses();
+ int lastInterpolatedRow = -1;
+ while (queue.size() > 0) {
+ CellAddress cellAddress = queue.peek();
+ if (currentRow == CHECK_ALL_ROWS || cellAddress.getRow() < currentRow) {
+ cellAddress = queue.remove();
+ if (cellAddress.getRow() != lastInterpolatedRow) {
+ startRow(cellAddress.getRow());
+ }
+ dumpEmptyCellComment(cellAddress, comments.get(cellAddress));
+ lastInterpolatedRow = cellAddress.getRow();
+ } else {
+ break;
+ }
+ }
+
+ }
+
+ private void startRow(int row) {
+ if (row == lastStartedRow) {
+ return;
+ }
+
+ if (lastStartedRow != lastEndedRow) {
+ endRow(lastStartedRow);
+ }
+ handler.startRow(row);
+ lastStartedRow = row;
+ }
+
+ private void endRow(int row) {
+ if (lastEndedRow == row) {
+ return;
+ }
+ handler.endRow(row);
+ lastEndedRow = row;
+ }
+
+ private void dumpEmptyCellComment(CellAddress cellAddress, XSSFBComment comment) {
+ handler.cell(cellAddress.formatAsString(), null, comment);
+ }
+
+ private double rkNumber(byte[] data, int offset) {
+ //see 2.5.122 for this abomination
+ byte b0 = data[offset];
+ String s = Integer.toString(b0, 2);
+ boolean numDivBy100 = ((b0 & 1) == 1); // else as is
+ boolean floatingPoint = ((b0 >> 1 & 1) == 0); // else signed integer
+
+ //unset highest 2 bits
+ b0 &= ~1;
+ b0 &= ~(1<<1);
+
+ rkBuffer[4] = b0;
+ for (int i = 1; i < 4; i++) {
+ rkBuffer[i+4] = data[offset+i];
+ }
+ double d = 0.0;
+ if (floatingPoint) {
+ d = LittleEndian.getDouble(rkBuffer);
+ } else {
+ d = LittleEndian.getInt(rkBuffer);
+ }
+ d = (numDivBy100) ? d/100 : d;
+ return d;
+ }
+
+ /**
+ * You need to implement this to handle the results
+ * of the sheet parsing.
+ */
+ public interface SheetContentsHandler extends XSSFSheetXMLHandler.SheetContentsHandler {
+ /**
+ * A cell, with the given formatted value (may be null),
+ * a url (may be null), a toolTip (may be null)
+ * and possibly a comment (may be null), was encountered */
+ void hyperlinkCell(String cellReference, String formattedValue, String url, String toolTip, XSSFComment comment);
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import org.apache.poi.POIXMLException;
+import org.apache.poi.ss.usermodel.BuiltinFormats;
+import org.apache.poi.util.Internal;
+
+@Internal
+public class XSSFBStylesTable extends XSSFBParser {
+
+ private final SortedMap<Short, String> numberFormats = new TreeMap<Short,String>();
+ private final List<Short> styleIds = new ArrayList<Short>();
+
+ private boolean inCellXFS = false;
+ private boolean inFmts = false;
+ public XSSFBStylesTable(InputStream is) throws IOException {
+ super(is);
+ parse();
+ }
+
+ String getNumberFormatString(int idx) {
+ if (numberFormats.containsKey(styleIds.get((short)idx))) {
+ return numberFormats.get(styleIds.get((short)idx));
+ }
+
+ return BuiltinFormats.getBuiltinFormat(styleIds.get((short)idx));
+ }
+
+ @Override
+ public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+ XSSFBRecordType type = XSSFBRecordType.lookup(recordType);
+ switch (type) {
+ case BrtBeginCellXFs:
+ inCellXFS = true;
+ break;
+ case BrtEndCellXFs:
+ inCellXFS = false;
+ break;
+ case BrtXf:
+ if (inCellXFS) {
+ handleBrtXFInCellXF(data);
+ }
+ break;
+ case BrtBeginFmts:
+ inFmts = true;
+ break;
+ case BrtEndFmts:
+ inFmts = false;
+ break;
+ case BrtFmt:
+ if (inFmts) {
+ handleFormat(data);
+ }
+ break;
+
+ }
+ }
+
+ private void handleFormat(byte[] data) {
+ int ifmt = data[0] & 0xFF;
+ if (ifmt > Short.MAX_VALUE) {
+ throw new POIXMLException("Format id must be a short");
+ }
+ StringBuilder sb = new StringBuilder();
+ XSSFBUtils.readXLWideString(data, 2, sb);
+ String fmt = sb.toString();
+ numberFormats.put((short)ifmt, fmt);
+ }
+
+ private void handleBrtXFInCellXF(byte[] data) {
+ int ifmtOffset = 2;
+ //int ifmtLength = 2;
+
+ //numFmtId in xml terms
+ int ifmt = data[ifmtOffset] & 0xFF;//the second byte is ignored
+ styleIds.add((short)ifmt);
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+
+import java.nio.charset.Charset;
+
+import org.apache.poi.POIXMLException;
+import org.apache.poi.util.Internal;
+import org.apache.poi.util.LittleEndian;
+
+@Internal
+public class XSSFBUtils {
+
+ /**
+ * Reads an XLNullableWideString.
+ * @param data data from which to read
+ * @param offset in data from which to start
+ * @param sb buffer to which to write. You must setLength(0) before calling!
+ * @return number of bytes read
+ * @throws XSSFBParseException if there was an exception during reading
+ */
+ static int readXLNullableWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException {
+ long numChars = LittleEndian.getUInt(data, offset);
+ if (numChars < 0) {
+ throw new XSSFBParseException("too few chars to read");
+ } else if (numChars == 0xFFFFFFFFL) { //this means null value (2.5.166), do not read any bytes!!!
+ return 0;
+ } else if (numChars > 0xFFFFFFFFL) {
+ throw new XSSFBParseException("too many chars to read");
+ }
+
+ int numBytes = 2*(int)numChars;
+ offset += 4;
+ if (offset+numBytes > data.length) {
+ throw new XSSFBParseException("trying to read beyond data length:" +
+ "offset="+offset+", numBytes="+numBytes+", data.length="+data.length);
+ }
+ sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE")));
+ numBytes+=4;
+ return numBytes;
+ }
+
+
+ /**
+ * Reads an XLNullableWideString.
+ * @param data data from which to read
+ * @param offset in data from which to start
+ * @param sb buffer to which to write. You must setLength(0) before calling!
+ * @return number of bytes read
+ * @throws XSSFBParseException if there was an exception while trying to read the string
+ */
+ public static int readXLWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException {
+ long numChars = LittleEndian.getUInt(data, offset);
+ if (numChars < 0) {
+ throw new XSSFBParseException("too few chars to read");
+ } else if (numChars > 0xFFFFFFFFL) {
+ throw new XSSFBParseException("too many chars to read");
+ }
+ int numBytes = 2*(int)numChars;
+ offset += 4;
+ if (offset+numBytes > data.length) {
+ throw new XSSFBParseException("trying to read beyond data length");
+ }
+ sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE")));
+ numBytes+=4;
+ return numBytes;
+ }
+
+ static int castToInt(long val) {
+ if (val < Integer.MAX_VALUE && val > Integer.MIN_VALUE) {
+ return (int)val;
+ }
+ throw new POIXMLException("val ("+val+") can't be cast to int");
+ }
+
+ static short castToShort(int val) {
+ if (val < Short.MAX_VALUE && val > Short.MIN_VALUE) {
+ return (short)val;
+ }
+ throw new POIXMLException("val ("+val+") can't be cast to short");
+
+ }
+
+ //TODO: move to LittleEndian?
+ static int get24BitInt( byte[] data, int offset) {
+ int i = offset;
+ int b0 = data[i++] & 0xFF;
+ int b1 = data[i++] & 0xFF;
+ int b2 = data[i] & 0xFF;
+ return ( b2 << 16 ) + ( b1 << 8 ) + b0;
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.util.Internal;
+
+/**
+ * This is a read only record that maintains information about
+ * a hyperlink. In OOXML land, this information has to be merged
+ * from 1) the sheet's .rels to get the url and 2) from after the
+ * sheet data in they hyperlink section.
+ *
+ * The {@link #display} is often empty and should be filled from
+ * the contents of the anchor cell.
+ *
+ */
+@Internal
+public class XSSFHyperlinkRecord {
+
+ private final CellRangeAddress cellRangeAddress;
+ private final String relId;
+ private String location;
+ private String toolTip;
+ private String display;
+
+ XSSFHyperlinkRecord(CellRangeAddress cellRangeAddress, String relId, String location, String toolTip, String display) {
+ this.cellRangeAddress = cellRangeAddress;
+ this.relId = relId;
+ this.location = location;
+ this.toolTip = toolTip;
+ this.display = display;
+ }
+
+ void setLocation(String location) {
+ this.location = location;
+ }
+
+ void setToolTip(String toolTip) {
+ this.toolTip = toolTip;
+ }
+
+ void setDisplay(String display) {
+ this.display = display;
+ }
+
+ CellRangeAddress getCellRangeAddress() {
+ return cellRangeAddress;
+ }
+
+ public String getRelId() {
+ return relId;
+ }
+
+ public String getLocation() {
+ return location;
+ }
+
+ public String getToolTip() {
+ return toolTip;
+ }
+
+ public String getDisplay() {
+ return display;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ XSSFHyperlinkRecord that = (XSSFHyperlinkRecord) o;
+
+ if (cellRangeAddress != null ? !cellRangeAddress.equals(that.cellRangeAddress) : that.cellRangeAddress != null)
+ return false;
+ if (relId != null ? !relId.equals(that.relId) : that.relId != null) return false;
+ if (location != null ? !location.equals(that.location) : that.location != null) return false;
+ if (toolTip != null ? !toolTip.equals(that.toolTip) : that.toolTip != null) return false;
+ return display != null ? display.equals(that.display) : that.display == null;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = cellRangeAddress != null ? cellRangeAddress.hashCode() : 0;
+ result = 31 * result + (relId != null ? relId.hashCode() : 0);
+ result = 31 * result + (location != null ? location.hashCode() : 0);
+ result = 31 * result + (toolTip != null ? toolTip.hashCode() : 0);
+ result = 31 * result + (display != null ? display.hashCode() : 0);
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return "XSSFHyperlinkRecord{" +
+ "cellRangeAddress=" + cellRangeAddress +
+ ", relId='" + relId + '\'' +
+ ", location='" + location + '\'' +
+ ", toolTip='" + toolTip + '\'' +
+ ", display='" + display + '\'' +
+ '}';
+ }
+}
--- /dev/null
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<!--
+ ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ====================================================================
+-->
+<html>
+<head>
+</head>
+<body bgcolor="white">
+
+<p>The org.apache.poi.xssf.binary package includes necessary underlying components
+for streaming/read-only processing of xlsb files.
+</p>
+<p>
+ POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader
+ in o.a.p.xssf.eventusermodel.
+</p>
+<p>
+ This feature was added in poi-3.15-beta3 and should be considered experimental. Most classes
+ have been marked @Internal and the API is subject to change.
+</p>
+<h2>Related Documentation</h2>
+
+For overviews, tutorials, examples, guides, and tool documentation, please see:
+<ul>
+<li><a href="http://poi.apache.org">Apache POI Project</a>
+</ul>
+
+</body>
+</html>
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.eventusermodel;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.xssf.binary.XSSFBCommentsTable;
+import org.apache.poi.xssf.binary.XSSFBParseException;
+import org.apache.poi.xssf.binary.XSSFBParser;
+import org.apache.poi.xssf.binary.XSSFBRecordType;
+import org.apache.poi.xssf.binary.XSSFBRelation;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.binary.XSSFBUtils;
+import org.apache.poi.xssf.model.CommentsTable;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+
+/**
+ * Reader for xlsb files.
+ */
+public class XSSFBReader extends XSSFReader {
+ /**
+ * Creates a new XSSFReader, for the given package
+ *
+ * @param pkg opc package
+ */
+ public XSSFBReader(OPCPackage pkg) throws IOException, OpenXML4JException {
+ super(pkg);
+ }
+
+ /**
+ * Returns an Iterator which will let you get at all the
+ * different Sheets in turn.
+ * Each sheet's InputStream is only opened when fetched
+ * from the Iterator. It's up to you to close the
+ * InputStreams when done with each one.
+ */
+ @Override
+ public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
+ return new SheetIterator(workbookPart);
+ }
+
+ public XSSFBStylesTable getXSSFBStylesTable() throws IOException {
+ ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFBRelation.STYLES_BINARY.getContentType());
+ if(parts.size() == 0) return null;
+
+ // Create the Styles Table, and associate the Themes if present
+ return new XSSFBStylesTable(parts.get(0).getInputStream());
+
+ }
+
+
+ public static class SheetIterator extends XSSFReader.SheetIterator {
+
+ /**
+ * Construct a new SheetIterator
+ *
+ * @param wb package part holding workbook.xml
+ */
+ private SheetIterator(PackagePart wb) throws IOException {
+ super(wb);
+ }
+
+ Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
+ SheetRefLoader sheetRefLoader = new SheetRefLoader(wb.getInputStream());
+ sheetRefLoader.parse();
+ return sheetRefLoader.getSheets().iterator();
+ }
+
+ /**
+ * Not supported by XSSFBReader's SheetIterator.
+ * Please use {@link #getXSSFBSheetComments()} instead.
+ * @return nothing, always throws IllegalArgumentException!
+ */
+ @Override
+ public CommentsTable getSheetComments() {
+ throw new IllegalArgumentException("Please use getXSSFBSheetComments");
+ }
+
+ public XSSFBCommentsTable getXSSFBSheetComments() {
+ PackagePart sheetPkg = getSheetPart();
+
+ // Do we have a comments relationship? (Only ever one if so)
+ try {
+ PackageRelationshipCollection commentsList =
+ sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
+ if (commentsList.size() > 0) {
+ PackageRelationship comments = commentsList.getRelationship(0);
+ if (comments == null || comments.getTargetURI() == null) {
+ return null;
+ }
+ PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
+ PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
+ return new XSSFBCommentsTable(commentsPart.getInputStream());
+ }
+ } catch (InvalidFormatException e) {
+ return null;
+ } catch (IOException e) {
+ return null;
+ }
+ return null;
+ }
+
+ }
+
+ private static class SheetRefLoader extends XSSFBParser {
+ List<XSSFSheetRef> sheets = new LinkedList<XSSFSheetRef>();
+
+ private SheetRefLoader(InputStream is) {
+ super(is);
+ }
+
+ @Override
+ public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+ if (recordType == XSSFBRecordType.BrtBundleSh.getId()) {
+ addWorksheet(data);
+ }
+ }
+
+ private void addWorksheet(byte[] data) {
+ int offset = 0;
+ //this is the sheet state #2.5.142
+ long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+
+ long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+ //according to #2.4.304
+ if (iTabID < 1 || iTabID > 0x0000FFFFL) {
+ throw new XSSFBParseException("table id out of range: "+iTabID);
+ }
+ StringBuilder sb = new StringBuilder();
+ offset += XSSFBUtils.readXLWideString(data, offset, sb);
+ String relId = sb.toString();
+ sb.setLength(0);
+ XSSFBUtils.readXLWideString(data, offset, sb);
+ String name = sb.toString();
+ if (relId != null && relId.trim().length() > 0) {
+ sheets.add(new XSSFSheetRef(relId, name));
+ }
+ }
+
+ List<XSSFSheetRef> getSheets() {
+ return sheets;
+ }
+ }
+}
\ No newline at end of file
==================================================================== */
package org.apache.poi.xssf.eventusermodel;
-import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
-
+import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import org.apache.poi.POIXMLException;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
+import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
/**
* This class makes it easy to get at individual parts
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class);
- private OPCPackage pkg;
- private PackagePart workbookPart;
+ protected OPCPackage pkg;
+ protected PackagePart workbookPart;
/**
* Creates a new XSSFReader, for the given package
private final Map<String, PackagePart> sheetMap;
/**
- * Current CTSheet bean
+ * Current sheet reference
*/
- private CTSheet ctSheet;
-
+ XSSFSheetRef xssfSheetRef;
+
/**
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
* i.e. as they are stored in the underlying package
*/
- private final Iterator<CTSheet> sheetIterator;
+ final Iterator<XSSFSheetRef> sheetIterator;
/**
* Construct a new SheetIterator
*
* @param wb package part holding workbook.xml
*/
- private SheetIterator(PackagePart wb) throws IOException {
+ SheetIterator(PackagePart wb) throws IOException {
/**
* The order of sheets is defined by the order of CTSheet elements in workbook.xml
sheetMap.put(rel.getId(), pkg.getPart(relName));
}
}
- //step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
- //Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
- CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook();
- List<CTSheet> validSheets = new ArrayList<CTSheet>();
- for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) {
- //if there's no relationship id, silently skip the sheet
- String sheetId = ctSheet.getId();
- if (sheetId != null && sheetId.length() > 0) {
- validSheets.add(ctSheet);
- }
- }
- sheetIterator = validSheets.iterator();
+ //step 2. Read array of CTSheet elements, wrap it in a LinkedList
+ //and construct an iterator
+ sheetIterator = createSheetIteratorFromWB(wb);
} catch (InvalidFormatException e){
throw new POIXMLException(e);
- } catch (XmlException e){
+ }
+ }
+
+ Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
+
+ XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader();
+ XMLReader xmlReader = null;
+ try {
+ xmlReader = SAXHelper.newXMLReader();
+ } catch (ParserConfigurationException e) {
+ throw new POIXMLException(e);
+ } catch (SAXException e) {
throw new POIXMLException(e);
}
+ xmlReader.setContentHandler(xmlSheetRefReader);
+ try {
+ xmlReader.parse(new InputSource(wb.getInputStream()));
+ } catch (SAXException e) {
+ throw new POIXMLException(e);
+ }
+
+ List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>();
+ for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) {
+ //if there's no relationship id, silently skip the sheet
+ String sheetId = xssfSheetRef.getId();
+ if (sheetId != null && sheetId.length() > 0) {
+ validSheets.add(xssfSheetRef);
+ }
+ }
+ return validSheets.iterator();
}
+
/**
* Returns <tt>true</tt> if the iteration has more elements.
*
*/
@Override
public InputStream next() {
- ctSheet = sheetIterator.next();
+ xssfSheetRef = sheetIterator.next();
- String sheetId = ctSheet.getId();
+ String sheetId = xssfSheetRef.getId();
try {
PackagePart sheetPkg = sheetMap.get(sheetId);
return sheetPkg.getInputStream();
* @return name of the current sheet
*/
public String getSheetName() {
- return ctSheet.getName();
+ return xssfSheetRef.getName();
}
/**
}
public PackagePart getSheetPart() {
- String sheetId = ctSheet.getId();
+ String sheetId = xssfSheetRef.getId();
return sheetMap.get(sheetId);
}
throw new IllegalStateException("Not supported");
}
}
+
+ protected final static class XSSFSheetRef {
+ //do we need to store sheetId, too?
+ private final String id;
+ private final String name;
+
+ public XSSFSheetRef(String id, String name) {
+ this.id = id;
+ this.name = name;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ //scrapes sheet reference info and order from workbook.xml
+ private static class XMLSheetRefReader extends DefaultHandler {
+ private final static String SHEET = "sheet";
+ private final static String ID = "id";
+ private final static String NAME = "name";
+
+ private final List<XSSFSheetRef> sheetRefs = new LinkedList();
+
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
+ if (localName.toLowerCase(Locale.US).equals(SHEET)) {
+ String name = null;
+ String id = null;
+ for (int i = 0; i < attrs.getLength(); i++) {
+ if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) {
+ name = attrs.getValue(i);
+ } else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) {
+ id = attrs.getValue(i);
+ }
+ sheetRefs.add(new XSSFSheetRef(id, name));
+ }
+ }
+ }
+
+ List<XSSFSheetRef> getSheetRefs() {
+ return Collections.unmodifiableList(sheetRefs);
+ }
+ }
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.extractor;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.xssf.binary.XSSFBCommentsTable;
+import org.apache.poi.xssf.binary.XSSFBHyperlinksTable;
+import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
+import org.apache.poi.xssf.binary.XSSFBSheetHandler;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.eventusermodel.XSSFBReader;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.xmlbeans.XmlException;
+import org.xml.sax.SAXException;
+
+/**
+ * Implementation of a text extractor or xlsb Excel
+ * files that uses SAX-like binary parsing.
+ */
+public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
+ implements org.apache.poi.ss.extractor.ExcelExtractor {
+
+ public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
+ XSSFRelation.XLSB_BINARY_WORKBOOK
+ };
+
+ private boolean handleHyperlinksInCells = false;
+
+ public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
+ super(path);
+ }
+
+ public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
+ super(container);
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" XSSFBEventBasedExcelExtractor <filename.xlsb>");
+ System.exit(1);
+ }
+ POIXMLTextExtractor extractor =
+ new XSSFBEventBasedExcelExtractor(args[0]);
+ System.out.println(extractor.getText());
+ extractor.close();
+ }
+
+ public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
+ this.handleHyperlinksInCells = handleHyperlinksInCells;
+ }
+
+ /**
+ * Should we return the formula itself, and not
+ * the result it produces? Default is false
+ * This is currently unsupported for xssfb
+ */
+ @Override
+ public void setFormulasNotResults(boolean formulasNotResults) {
+ throw new IllegalArgumentException("Not currently supported");
+ }
+
+ /**
+ * Processes the given sheet
+ */
+ public void processSheet(
+ SheetContentsHandler sheetContentsExtractor,
+ XSSFBStylesTable styles,
+ XSSFBCommentsTable comments,
+ XSSFBSharedStringsTable strings,
+ InputStream sheetInputStream)
+ throws IOException, SAXException {
+
+ DataFormatter formatter;
+ if (locale == null) {
+ formatter = new DataFormatter();
+ } else {
+ formatter = new DataFormatter(locale);
+ }
+
+ XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler(
+ sheetInputStream,
+ styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults
+ );
+ xssfbSheetHandler.parse();
+ }
+
+ /**
+ * Processes the file and returns the text
+ */
+ public String getText() {
+ try {
+ XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container);
+ XSSFBReader xssfbReader = new XSSFBReader(container);
+ XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
+ XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
+
+ StringBuffer text = new StringBuffer();
+ SheetTextExtractor sheetExtractor = new SheetTextExtractor();
+ XSSFBHyperlinksTable hyperlinksTable = null;
+ while (iter.hasNext()) {
+ InputStream stream = iter.next();
+ if (includeSheetNames) {
+ text.append(iter.getSheetName());
+ text.append('\n');
+ }
+ if (handleHyperlinksInCells) {
+ hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
+ }
+ XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null;
+ processSheet(sheetExtractor, styles, comments, strings, stream);
+ if (includeHeadersFooters) {
+ sheetExtractor.appendHeaderText(text);
+ }
+ sheetExtractor.appendCellText(text);
+ if (includeTextBoxes) {
+ processShapes(iter.getShapes(), text);
+ }
+ if (includeHeadersFooters) {
+ sheetExtractor.appendFooterText(text);
+ }
+ sheetExtractor.reset();
+ stream.close();
+ }
+
+ return text.toString();
+ } catch (IOException e) {
+ System.err.println(e);
+ return null;
+ } catch (SAXException se) {
+ System.err.println(se);
+ return null;
+ } catch (OpenXML4JException o4je) {
+ System.err.println(o4je);
+ return null;
+ }
+ }
+
+}
*/
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
implements org.apache.poi.ss.extractor.ExcelExtractor {
- private OPCPackage container;
+ OPCPackage container;
private POIXMLProperties properties;
- private Locale locale;
- private boolean includeTextBoxes = true;
- private boolean includeSheetNames = true;
- private boolean includeCellComments = false;
- private boolean includeHeadersFooters = true;
- private boolean formulasNotResults = false;
+ Locale locale;
+ boolean includeTextBoxes = true;
+ boolean includeSheetNames = true;
+ boolean includeCellComments = false;
+ boolean includeHeadersFooters = true;
+ boolean formulasNotResults = false;
private boolean concatenatePhoneticRuns = true;
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
}
}
- private void processShapes(List<XSSFShape> shapes, StringBuffer text) {
+ void processShapes(List<XSSFShape> shapes, StringBuffer text) {
if (shapes == null){
return;
}
* @see XSSFExcelExtractor#getText()
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
*/
- private void appendHeaderText(StringBuffer buffer) {
+ void appendHeaderText(StringBuffer buffer) {
appendHeaderFooterText(buffer, "firstHeader");
appendHeaderFooterText(buffer, "oddHeader");
appendHeaderFooterText(buffer, "evenHeader");
* @see XSSFExcelExtractor#getText()
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
*/
- private void appendFooterText(StringBuffer buffer) {
+ void appendFooterText(StringBuffer buffer) {
// append the text for each footer type in the same order
// they are appended in XSSFExcelExtractor
appendHeaderFooterText(buffer, "firstFooter");
/**
* Append the cell contents we have collected.
*/
- private void appendCellText(StringBuffer buffer) {
+ void appendCellText(StringBuffer buffer) {
checkMaxTextSize(buffer, output.toString());
buffer.append(output);
}
/**
* Reset this <code>SheetTextExtractor</code> for the next sheet.
*/
- private void reset() {
+ void reset() {
output.setLength(0);
firstCellOfRow = true;
if (headerFooterMap != null) {
private static File xlsxStrict;
private static File xltx;
private static File xlsEmb;
+ private static File xlsb;
private static File doc;
private static File doc6;
xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
xltx = getFileAndCheck(ssTests, "test.xltx");
xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
+ xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
doc = getFileAndCheck(wpTests, "SampleDoc.doc");
);
extractor.close();
+ extractor = ExtractorFactory.createExtractor(xlsb);
+ assertTrue(
+ extractor.getText().contains("test")
+ );
+ extractor.close();
+
+
extractor = ExtractorFactory.createExtractor(xltx);
assertTrue(
extractor.getText().contains("test")
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.junit.Test;
+
+public class TestXSSFBSharedStringsTable {
+
+
+ private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+ @Test
+ public void testBasic() throws Exception {
+
+ OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsb"));
+ List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.bin"));
+ assertEquals(1, parts.size());
+
+ XSSFBSharedStringsTable rtbl = new XSSFBSharedStringsTable(parts.get(0));
+ List<String> strings = rtbl.getItems();
+ assertEquals(49, strings.size());
+
+ assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
+ assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
+ assertEquals(55, rtbl.getCount());
+ assertEquals(49, rtbl.getUniqueCount());
+
+ //TODO: add in tests for phonetic runs
+
+ }
+
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.xssf.eventusermodel.XSSFBReader;
+import org.apache.poi.xssf.eventusermodel.XSSFReader;
+import org.junit.Test;
+
+public class TestXSSFBSheetHyperlinkManager {
+
+ private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+ @Test
+ public void testBasic() throws Exception {
+
+ OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb"));
+ XSSFBReader reader = new XSSFBReader(pkg);
+ XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData();
+ it.next();
+ XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart());
+ List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0));
+ assertNotNull(records);
+ assertEquals(1, records.size());
+ XSSFHyperlinkRecord record = records.get(0);
+ assertEquals("http://tika.apache.org/", record.getLocation());
+ assertEquals("rId2", record.getRelId());
+
+ }
+
+
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.eventusermodel;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
+import org.apache.poi.xssf.binary.XSSFBSheetHandler;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.usermodel.XSSFComment;
+import org.junit.Test;
+
+public class TestXSSFBReader {
+
+ private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+ @Test
+ public void testBasic() throws Exception {
+ List<String> sheetTexts = getSheets("testVarious.xlsb");
+
+ assertEquals(1, sheetTexts.size());
+ String xsxml = sheetTexts.get(0);
+ assertContains("This is a string", xsxml);
+ assertContains("<td ref=\"B2\">13</td>", xsxml);
+ assertContains("<td ref=\"B3\">13.12112313</td>", xsxml);
+ assertContains("<td ref=\"B4\">$ 3.03</td>", xsxml);
+ assertContains("<td ref=\"B5\">20%</td>", xsxml);
+ assertContains("<td ref=\"B6\">13.12</td>", xsxml);
+ assertContains("<td ref=\"B7\">1.23457E+14</td>", xsxml);
+ assertContains("<td ref=\"B8\">1.23457E+15</td>", xsxml);
+
+ assertContains("46/1963", xsxml);//custom format 1
+ assertContains("3/128", xsxml);//custom format 2
+
+ assertContains("<tr num=\"7>\n" +
+ "\t<td ref=\"A8\">longer int</td>\n" +
+ "\t<td ref=\"B8\">1.23457E+15</td>\n" +
+ "\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "test comment2</span></td>\n" +
+ "</tr num=\"7>", xsxml);
+
+ assertContains("<tr num=\"34>\n" +
+ "\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "comment6 actually in cell</span></td>\n" +
+ "</tr num=\"34>", xsxml);
+
+ assertContains("<tr num=\"64>\n" +
+ "\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "comment7 end of file</span></td>\n" +
+ "</tr num=\"64>", xsxml);
+
+ assertContains("<tr num=\"65>\n" +
+ "\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "comment8 end of file</span></td>\n" +
+ "</tr num=\"65>", xsxml);
+
+ assertContains("<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>", xsxml);
+ assertContains("<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>", xsxml);
+ assertContains(
+ "<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>",
+ xsxml);
+ assertContains(
+ "<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>",
+ xsxml);
+ assertContains(
+ "<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>",
+ xsxml);
+ assertContains(
+ "<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>",
+ xsxml);
+
+ }
+
+ @Test
+ public void testComments() throws Exception {
+ List<String> sheetTexts = getSheets("comments.xlsb");
+ String xsxml = sheetTexts.get(0);
+ assertContains(
+ "<tr num=\"0>\n" +
+ "\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" +
+ "\t<td ref=\"B1\">row1</td>\n" +
+ "</tr num=\"0>", xsxml);
+ assertContains(
+ "<tr num=\"1>\n" +
+ "\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "comment row2 (index1)</span></td>\n" +
+ "</tr num=\"1>",
+ xsxml);
+ assertContains("<tr num=\"2>\n" +
+ "\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" +
+ "\t<td ref=\"B3\">row3</td>\n", xsxml);
+
+ assertContains("<tr num=\"3>\n" +
+ "\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" +
+ "\t<td ref=\"B4\">row4</td>\n" +
+ "</tr num=\"3></sheet>", xsxml);
+
+ }
+
+ private List<String> getSheets(String testFileName) throws Exception {
+ OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName));
+ List<String> sheetTexts = new ArrayList<String>();
+ XSSFBReader r = new XSSFBReader(pkg);
+
+// assertNotNull(r.getWorkbookData());
+ // assertNotNull(r.getSharedStringsData());
+ assertNotNull(r.getXSSFBStylesTable());
+ XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
+ XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
+ XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData();
+
+ while (it.hasNext()) {
+ InputStream is = it.next();
+ String name = it.getSheetName();
+ TestSheetHandler testSheetHandler = new TestSheetHandler();
+ testSheetHandler.startSheet(name);
+ XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
+ xssfbStylesTable,
+ it.getXSSFBSheetComments(),
+ sst, testSheetHandler,
+ new DataFormatter(),
+ false);
+ sheetHandler.parse();
+ testSheetHandler.endSheet();
+ sheetTexts.add(testSheetHandler.toString());
+ }
+ return sheetTexts;
+
+ }
+
+ //This converts all [\r\n\t]+ to " "
+ private void assertContains(String needle, String haystack) {
+ needle = needle.replaceAll("[\r\n\t]+", " ");
+ haystack = haystack.replaceAll("[\r\n\t]+", " ");
+ if (haystack.indexOf(needle) < 0) {
+ fail("couldn't find >"+needle+"< in: "+haystack );
+ }
+ }
+
+
+ @Test
+ public void testDate() throws Exception {
+ List<String> sheets = getSheets("date.xlsb");
+ assertEquals(1, sheets.size());
+ assertContains("1/12/13", sheets.get(0));
+
+ }
+
+
+ private class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
+ private final StringBuilder sb = new StringBuilder();
+
+ public void startSheet(String sheetName) {
+ sb.append("<sheet name=\"").append(sheetName).append(">");
+ }
+
+ public void endSheet(){
+ sb.append("</sheet>");
+ }
+ @Override
+ public void startRow(int rowNum) {
+ sb.append("\n<tr num=\"").append(rowNum).append(">");
+ }
+
+ @Override
+ public void endRow(int rowNum) {
+ sb.append("\n</tr num=\"").append(rowNum).append(">");
+ }
+
+ @Override
+ public void cell(String cellReference, String formattedValue, XSSFComment comment) {
+ formattedValue = (formattedValue == null) ? "" : formattedValue;
+ if (comment == null) {
+ sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
+ } else {
+ sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
+ .append(formattedValue)
+ .append("<span type=\"comment\" author=\"")
+ .append(comment.getAuthor()).append("\">")
+ .append(comment.getString().toString().trim()).append("</span>")
+ .append("</td>");
+ }
+ }
+
+ @Override
+ public void headerFooter(String text, boolean isHeader, String tagName) {
+ if (isHeader) {
+ sb.append("<header tagName=\""+tagName+"\">"+text+"</header>");
+ } else {
+ sb.append("<footer tagName=\""+tagName+"\">"+text+"</footer>");
+
+ }
+ }
+
+ @Override
+ public String toString() {
+ return sb.toString();
+ }
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.extractor;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.junit.Test;
+
+/**
+ * Tests for {@link XSSFBEventBasedExcelExtractor}
+ */
+public class TestXSSFBEventBasedExcelExtractor {
+
+
+ protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
+ return new XSSFBEventBasedExcelExtractor(XSSFTestDataSamples.
+ openSamplePackage(sampleName));
+ }
+
+ /**
+ * Get text out of the simple file
+ */
+ @Test
+ public void testGetSimpleText() throws Exception {
+ // a very simple file
+ XSSFEventBasedExcelExtractor extractor = getExtractor("sample.xlsb");
+ extractor.setIncludeCellComments(true);
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check sheet names
+ assertTrue(text.startsWith("Sheet1"));
+ assertTrue(text.endsWith("Sheet3\n"));
+
+ // Now without, will have text
+ extractor.setIncludeSheetNames(false);
+ text = extractor.getText();
+ String CHUNK1 =
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n";
+ String CHUNK2 =
+ "The quick brown fox jumps over the lazy dog\n" +
+ "hello, xssf hello, xssf\n" +
+ "hello, xssf hello, xssf\n" +
+ "hello, xssf hello, xssf\n" +
+ "hello, xssf hello, xssf\n";
+ assertEquals(
+ CHUNK1 +
+ "at\t4995\n" +
+ CHUNK2
+ , text);
+
+ }
+
+
+ /**
+ * Test text extraction from text box using getShapes()
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testShapes() throws Exception {
+ XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsb");
+
+ try {
+ String text = ooxmlExtractor.getText();
+
+ assertTrue(text.indexOf("Line 1") > -1);
+ assertTrue(text.indexOf("Line 2") > -1);
+ assertTrue(text.indexOf("Line 3") > -1);
+ } finally {
+ ooxmlExtractor.close();
+ }
+ }
+
+}