From: Tim Allison Date: Thu, 16 Mar 2017 18:37:13 +0000 (+0000) Subject: 60826 -- add initial support for streaming reading of xlsb files. X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=730f394261c5ea8d7ec0f455f018d909fd2e71dd;p=poi.git 60826 -- add initial support for streaming reading of xlsb files. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1787228 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 7533a27426..faae5bacbd 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -56,6 +56,7 @@ import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xslf.usermodel.XSLFRelation; import org.apache.poi.xslf.usermodel.XSLFSlideShow; +import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; import org.apache.poi.xssf.extractor.XSSFExcelExtractor; import org.apache.poi.xssf.usermodel.XSSFRelation; @@ -244,6 +245,13 @@ public class ExtractorFactory { return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg)); } + // How about xlsb? + for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) { + if (rel.getContentType().equals(contentType)) { + return new XSSFBEventBasedExcelExtractor(pkg); + } + } + throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")"); } catch (IOException e) { diff --git a/src/ooxml/java/org/apache/poi/xssf/XLSBUnsupportedException.java b/src/ooxml/java/org/apache/poi/xssf/XLSBUnsupportedException.java index 63260276f8..c6ebcff542 100644 --- a/src/ooxml/java/org/apache/poi/xssf/XLSBUnsupportedException.java +++ b/src/ooxml/java/org/apache/poi/xssf/XLSBUnsupportedException.java @@ -19,7 +19,9 @@ package org.apache.poi.xssf; import org.apache.poi.UnsupportedFileFormatException; /** - * We don't support .xlsb files, sorry + * We don't support .xlsb for read and write via {@link org.apache.poi.xssf.usermodel.XSSFWorkbook}. + * As of POI 3.15-beta3, we do support streaming reading of xlsb files + * via {@link org.apache.poi.xssf.eventusermodel.XSSFBReader} */ public class XLSBUnsupportedException extends UnsupportedFileFormatException { private static final long serialVersionUID = 7849681804154571175L; diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellHeader.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellHeader.java new file mode 100644 index 0000000000..5b427ae815 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellHeader.java @@ -0,0 +1,71 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import org.apache.poi.ss.util.CellReference; +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; + +/** + * This class encapsulates what the spec calls a "Cell" object. + * I added "Header" to clarify that this does not contain the contents + * of the cell, only the column number, the style id and the phonetic boolean + */ +@Internal +class XSSFBCellHeader { + public static int length = 8; + + /** + * + * @param data raw data + * @param offset offset at which to start reading the record + * @param currentRow 0-based current row count + * @param cell cell buffer to update + */ + public static void parse(byte[] data, int offset, int currentRow, XSSFBCellHeader cell) { + long colNum = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; + int styleIdx = XSSFBUtils.get24BitInt(data, offset); offset += 3; + //TODO: range checking + boolean showPhonetic = false;//TODO: fill this out + cell.reset(currentRow, (int)colNum, styleIdx, showPhonetic); + } + + private int rowNum; + private int colNum; + private int styleIdx; + private boolean showPhonetic; + + public void reset(int rowNum, int colNum, int styleIdx, boolean showPhonetic) { + this.rowNum = rowNum; + this.colNum = colNum; + this.styleIdx = styleIdx; + this.showPhonetic = showPhonetic; + } + + int getColNum() { + return colNum; + } + + String formatAddressAsString() { + return CellReference.convertNumToColString(colNum)+(rowNum+1); + } + + int getStyleIdx() { + return styleIdx; + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellRange.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellRange.java new file mode 100644 index 0000000000..3e2e79d8d1 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCellRange.java @@ -0,0 +1,54 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + + +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; + +@Internal +class XSSFBCellRange { + + public final static int length = 4* LittleEndian.INT_SIZE; + /** + * Parses an RfX cell range from the data starting at the offset. + * This performs no range checking. + * @param data raw bytes + * @param offset offset at which to start reading from data + * @param cellRange to overwrite. If null, a new cellRange will be created. + * @return a mutable cell range. + */ + public static XSSFBCellRange parse(byte[] data, int offset, XSSFBCellRange cellRange) { + if (cellRange == null) { + cellRange = new XSSFBCellRange(); + } + cellRange.firstRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE; + cellRange.lastRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE; + cellRange.firstCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE; + cellRange.lastCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); + + return cellRange; + } + + int firstRow; + int lastRow; + int firstCol; + int lastCol; + + +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBComment.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBComment.java new file mode 100644 index 0000000000..ae7c1c56ed --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBComment.java @@ -0,0 +1,112 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + + +import org.apache.poi.ss.usermodel.ClientAnchor; +import org.apache.poi.ss.usermodel.RichTextString; +import org.apache.poi.ss.util.CellAddress; +import org.apache.poi.util.Internal; +import org.apache.poi.xssf.usermodel.XSSFComment; + +@Internal +class XSSFBComment extends XSSFComment { + + private final CellAddress cellAddress; + private final String author; + private final XSSFBRichTextString comment; + private boolean visible = true; + + XSSFBComment(CellAddress cellAddress, String author, String comment) { + super(null, null, null); + this.cellAddress = cellAddress; + this.author = author; + this.comment = new XSSFBRichTextString(comment); + } + + @Override + public void setVisible(boolean visible) { + throw new IllegalArgumentException("XSSFBComment is read only."); + } + + @Override + public boolean isVisible() { + return visible; + } + + @Override + public CellAddress getAddress() { + return cellAddress; + } + + @Override + public void setAddress(CellAddress addr) { + throw new IllegalArgumentException("XSSFBComment is read only"); + } + + @Override + public void setAddress(int row, int col) { + throw new IllegalArgumentException("XSSFBComment is read only"); + + } + + @Override + public int getRow() { + return cellAddress.getRow(); + } + + @Override + public void setRow(int row) { + throw new IllegalArgumentException("XSSFBComment is read only"); + } + + @Override + public int getColumn() { + return cellAddress.getColumn(); + } + + @Override + public void setColumn(int col) { + throw new IllegalArgumentException("XSSFBComment is read only"); + } + + @Override + public String getAuthor() { + return author; + } + + @Override + public void setAuthor(String author) { + throw new IllegalArgumentException("XSSFBComment is read only"); + } + + @Override + public XSSFBRichTextString getString() { + return comment; + } + + @Override + public void setString(RichTextString string) { + throw new IllegalArgumentException("XSSFBComment is read only"); + } + + @Override + public ClientAnchor getClientAnchor() { + return null; + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCommentsTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCommentsTable.java new file mode 100644 index 0000000000..642eaf99b8 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBCommentsTable.java @@ -0,0 +1,113 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.TreeMap; + +import org.apache.poi.ss.util.CellAddress; +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; + +@Internal +public class XSSFBCommentsTable extends XSSFBParser { + + private Map comments = new TreeMap(new CellAddressComparator());//String is the cellAddress A1 + private Queue commentAddresses = new LinkedList(); + private List authors = new ArrayList(); + + //these are all used only during parsing, and they are mutable! + private int authorId = -1; + private CellAddress cellAddress = null; + private XSSFBCellRange cellRange = null; + private String comment = null; + private StringBuilder authorBuffer = new StringBuilder(); + + + public XSSFBCommentsTable(InputStream is) throws IOException { + super(is); + parse(); + commentAddresses.addAll(comments.keySet()); + } + + @Override + public void handleRecord(int id, byte[] data) throws XSSFBParseException { + XSSFBRecordType recordType = XSSFBRecordType.lookup(id); + switch (recordType) { + case BrtBeginComment: + int offset = 0; + authorId = XSSFBUtils.castToInt(LittleEndian.getUInt(data)); offset += LittleEndian.INT_SIZE; + cellRange = XSSFBCellRange.parse(data, offset, cellRange); + offset+= XSSFBCellRange.length; + //for strict parsing; confirm that firstRow==lastRow and firstCol==colLats (2.4.28) + cellAddress = new CellAddress(cellRange.firstRow, cellRange.firstCol); + break; + case BrtCommentText: + XSSFBRichStr xssfbRichStr = XSSFBRichStr.build(data, 0); + comment = xssfbRichStr.getString(); + break; + case BrtEndComment: + comments.put(cellAddress, new XSSFBComment(cellAddress, authors.get(authorId), comment)); + authorId = -1; + cellAddress = null; + break; + case BrtCommentAuthor: + authorBuffer.setLength(0); + XSSFBUtils.readXLWideString(data, 0, authorBuffer); + authors.add(authorBuffer.toString()); + break; + } + } + + + public Queue getAddresses() { + return commentAddresses; + } + + public XSSFBComment get(CellAddress cellAddress) { + if (cellAddress == null) { + return null; + } + return comments.get(cellAddress); + } + + private final static class CellAddressComparator implements Comparator { + + @Override + public int compare(CellAddress o1, CellAddress o2) { + if (o1.getRow() < o2.getRow()) { + return -1; + } else if (o1.getRow() > o2.getRow()) { + return 1; + } + if (o1.getColumn() < o2.getColumn()) { + return -1; + } else if (o1.getColumn() > o2.getColumn()) { + return 1; + } + return 0; + } + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooter.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooter.java new file mode 100644 index 0000000000..1f43e35dce --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooter.java @@ -0,0 +1,75 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import org.apache.poi.util.Internal; +import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper; + +@Internal +class XSSFBHeaderFooter { + private final String headerFooterTypeLabel; + private final boolean isHeader; + private String rawString; + private HeaderFooterHelper headerFooterHelper = new HeaderFooterHelper(); + + + XSSFBHeaderFooter(String headerFooterTypeLabel, boolean isHeader) { + this.headerFooterTypeLabel = headerFooterTypeLabel; + this.isHeader = isHeader; + } + + String getHeaderFooterTypeLabel() { + return headerFooterTypeLabel; + } + + String getRawString() { + return rawString; + } + + String getString() { + StringBuilder sb = new StringBuilder(); + String left = headerFooterHelper.getLeftSection(rawString); + String center = headerFooterHelper.getCenterSection(rawString); + String right = headerFooterHelper.getRightSection(rawString); + if (left != null && left.length() > 0) { + sb.append(left); + } + if (center != null && center.length() > 0) { + if (sb.length() > 0) { + sb.append(" "); + } + sb.append(center); + } + if (right != null && right.length() > 0) { + if (sb.length() > 0) { + sb.append(" "); + } + sb.append(right); + } + return sb.toString(); + } + + void setRawString(String rawString) { + this.rawString = rawString; + } + + boolean isHeader() { + return isHeader; + } + +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooters.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooters.java new file mode 100644 index 0000000000..c70b7843e3 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHeaderFooters.java @@ -0,0 +1,87 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xssf.binary; + +import org.apache.poi.util.Internal; + +@Internal +class XSSFBHeaderFooters { + + public static XSSFBHeaderFooters parse(byte[] data) { + boolean diffOddEven = false; + boolean diffFirst = false; + boolean scaleWDoc = false; + boolean alignMargins = false; + + int offset = 2; + XSSFBHeaderFooters xssfbHeaderFooter = new XSSFBHeaderFooters(); + xssfbHeaderFooter.header = new XSSFBHeaderFooter("header", true); + xssfbHeaderFooter.footer = new XSSFBHeaderFooter("footer", false); + xssfbHeaderFooter.headerEven = new XSSFBHeaderFooter("evenHeader", true); + xssfbHeaderFooter.footerEven = new XSSFBHeaderFooter("evenFooter", false); + xssfbHeaderFooter.headerFirst = new XSSFBHeaderFooter("firstHeader", true); + xssfbHeaderFooter.footerFirst = new XSSFBHeaderFooter("firstFooter", false); + offset += readHeaderFooter(data, offset, xssfbHeaderFooter.header); + offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footer); + offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerEven); + offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footerEven); + offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerFirst); + readHeaderFooter(data, offset, xssfbHeaderFooter.footerFirst); + return xssfbHeaderFooter; + } + + private static int readHeaderFooter(byte[] data, int offset, XSSFBHeaderFooter headerFooter) { + if (offset + 4 >= data.length) { + return 0; + } + StringBuilder sb = new StringBuilder(); + int bytesRead = XSSFBUtils.readXLNullableWideString(data, offset, sb); + headerFooter.setRawString(sb.toString()); + return bytesRead; + } + + private XSSFBHeaderFooter header; + private XSSFBHeaderFooter footer; + private XSSFBHeaderFooter headerEven; + private XSSFBHeaderFooter footerEven; + private XSSFBHeaderFooter headerFirst; + private XSSFBHeaderFooter footerFirst; + + public XSSFBHeaderFooter getHeader() { + return header; + } + + public XSSFBHeaderFooter getFooter() { + return footer; + } + + public XSSFBHeaderFooter getHeaderEven() { + return headerEven; + } + + public XSSFBHeaderFooter getFooterEven() { + return footerEven; + } + + public XSSFBHeaderFooter getHeaderFirst() { + return headerFirst; + } + + public XSSFBHeaderFooter getFooterFirst() { + return footerFirst; + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java new file mode 100644 index 0000000000..28c020c57b --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java @@ -0,0 +1,181 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.BitSet; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.ss.util.CellAddress; +import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.ss.util.CellRangeUtil; +import org.apache.poi.util.Internal; +import org.apache.poi.xssf.usermodel.XSSFRelation; + +@Internal +public class XSSFBHyperlinksTable { + + private final static BitSet RECORDS = new BitSet(); + + + static { + RECORDS.set(XSSFBRecordType.BrtHLink.getId()); + } + + + private final List hyperlinkRecords = new ArrayList(); + + //cache the relId to hyperlink url from the sheet's .rels + private Map relIdToHyperlink = new HashMap(); + + public XSSFBHyperlinksTable(PackagePart sheetPart) throws IOException { + //load the urls from the sheet .rels + loadUrlsFromSheetRels(sheetPart); + //now load the hyperlinks from the bottom of the sheet + HyperlinkSheetScraper scraper = new HyperlinkSheetScraper(sheetPart.getInputStream()); + scraper.parse(); + } + + /** + * + * @return a map of the hyperlinks. The key is the top left cell address in their CellRange + */ + public Map> getHyperLinks() { + Map> hyperlinkMap = + new TreeMap>(new TopLeftCellAddressComparator()); + for (XSSFHyperlinkRecord hyperlinkRecord : hyperlinkRecords) { + CellAddress cellAddress = new CellAddress(hyperlinkRecord.getCellRangeAddress().getFirstRow(), + hyperlinkRecord.getCellRangeAddress().getFirstColumn()); + List list = hyperlinkMap.get(cellAddress); + if (list == null) { + list = new ArrayList(); + } + list.add(hyperlinkRecord); + hyperlinkMap.put(cellAddress, list); + } + return hyperlinkMap; + } + + + /** + * + * @param cellAddress cell address to find + * @return null if not a hyperlink + */ + public List findHyperlinkRecord(CellAddress cellAddress) { + List overlapping = null; + CellRangeAddress targetCellRangeAddress = new CellRangeAddress(cellAddress.getRow(), + cellAddress.getRow(), + cellAddress.getColumn(), + cellAddress.getColumn()); + for (XSSFHyperlinkRecord record : hyperlinkRecords) { + if (CellRangeUtil.intersect(targetCellRangeAddress, record.getCellRangeAddress()) != CellRangeUtil.NO_INTERSECTION) { + if (overlapping == null) { + overlapping = new ArrayList(); + } + overlapping.add(record); + } + } + return overlapping; + } + + private void loadUrlsFromSheetRels(PackagePart sheetPart) { + try { + for (PackageRelationship rel : sheetPart.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation())) { + relIdToHyperlink.put(rel.getId(), rel.getTargetURI().toString()); + } + } catch (InvalidFormatException e) { + //swallow + } + } + + private class HyperlinkSheetScraper extends XSSFBParser { + + private XSSFBCellRange hyperlinkCellRange = new XSSFBCellRange(); + private final StringBuilder xlWideStringBuffer = new StringBuilder(); + + HyperlinkSheetScraper(InputStream is) { + super(is, RECORDS); + } + + @Override + public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { + if (recordType != XSSFBRecordType.BrtHLink.getId()) { + return; + } + int offset = 0; + String relId = ""; + String location = ""; + String toolTip = ""; + String display = ""; + + hyperlinkCellRange = XSSFBCellRange.parse(data, offset, hyperlinkCellRange); + offset += XSSFBCellRange.length; + xlWideStringBuffer.setLength(0); + offset += XSSFBUtils.readXLNullableWideString(data, offset, xlWideStringBuffer); + relId = xlWideStringBuffer.toString(); + xlWideStringBuffer.setLength(0); + offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer); + location = xlWideStringBuffer.toString(); + xlWideStringBuffer.setLength(0); + offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer); + toolTip = xlWideStringBuffer.toString(); + xlWideStringBuffer.setLength(0); + offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer); + display = xlWideStringBuffer.toString(); + CellRangeAddress cellRangeAddress = new CellRangeAddress(hyperlinkCellRange.firstRow, hyperlinkCellRange.lastRow, hyperlinkCellRange.firstCol, hyperlinkCellRange.lastCol); + + String url = relIdToHyperlink.get(relId); + if (location == null || location.length() == 0) { + location = url; + } + + hyperlinkRecords.add( + new XSSFHyperlinkRecord(cellRangeAddress, relId, location, toolTip, display) + ); + } + } + + private static class TopLeftCellAddressComparator implements Comparator { + + @Override + public int compare(CellAddress o1, CellAddress o2) { + if (o1.getRow() < o2.getRow()) { + return -1; + } else if (o1.getRow() > o2.getRow()) { + return 1; + } + if (o1.getColumn() < o2.getColumn()) { + return -1; + } else if (o1.getColumn() > o2.getColumn()) { + return 1; + } + return 0; + } + } + +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParseException.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParseException.java new file mode 100644 index 0000000000..69ba7f041c --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParseException.java @@ -0,0 +1,28 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +/** + * Parse exception while reading an xssfb + */ +public class XSSFBParseException extends RuntimeException { + + public XSSFBParseException(String msg) { + super(msg); + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java new file mode 100644 index 0000000000..cace843160 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java @@ -0,0 +1,105 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import java.io.IOException; +import java.io.InputStream; +import java.util.BitSet; + +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndianInputStream; + +/** + * Experimental parser for Microsoft's ooxml xssfb format. + * Not thread safe, obviously. Need to create a new one + * for each thread. + */ +@Internal +public abstract class XSSFBParser { + + private final LittleEndianInputStream is; + private final BitSet records; + + public XSSFBParser(InputStream is) { + this.is = new LittleEndianInputStream(is); + records = null; + } + + XSSFBParser(InputStream is, BitSet bitSet) { + this.is = new LittleEndianInputStream(is); + records = bitSet; + } + + public void parse() throws IOException { + + while (true) { + int bInt = is.read(); + if (bInt == -1) { + return; + } + readNext((byte) bInt); + } + } + + private void readNext(byte b1) throws IOException { + int recordId = 0; + + //if highest bit == 1 + if ((b1 >> 7 & 1) == 1) { + byte b2 = is.readByte(); + b1 &= ~(1<<7); //unset highest bit + b2 &= ~(1<<7); //unset highest bit (if it exists?) + recordId = (128*(int)b2)+(int)b1; + } else { + recordId = (int)b1; + } + + long recordLength = 0; + int i = 0; + boolean halt = false; + while (i < 4 && ! halt) { + byte b = is.readByte(); + halt = (b >> 7 & 1) == 0; //if highest bit !=1 then continue + b &= ~(1<<7); + recordLength += (int)b << (i*7); //multiply by 128^i + i++; + + } + if (records == null || records.get(recordId)) { + //add sanity check for length? + byte[] buff = new byte[(int) recordLength]; + is.readFully(buff); + handleRecord(recordId, buff); + } else { + long length = is.skip(recordLength); + if (length != recordLength) { + throw new XSSFBParseException("End of file reached before expected.\t"+ + "Tried to skip "+recordLength + ", but only skipped "+length); + } + } + } + + //It hurts, hurts, hurts to create a new byte array for every record. + //However, on a large Excel spreadsheet, this parser was 1/3 faster than + //the ooxml sax parser (5 seconds for xssfb and 7.5 seconds for xssf. + //The code is far cleaner to have the parser read all + //of the data rather than having every component promise that it read + //the correct amount. + abstract public void handleRecord(int recordType, byte[] data) throws XSSFBParseException; + +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRecordType.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRecordType.java new file mode 100644 index 0000000000..65663f7fd5 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRecordType.java @@ -0,0 +1,92 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import org.apache.poi.util.Internal; + +@Internal +public enum XSSFBRecordType { + + BrtCellBlank(1), + BrtCellRk(2), + BrtCellError(3), + BrtCellBool(4), + BrtCellReal(5), + BrtCellSt(6), + BrtCellIsst(7), + BrtFmlaString(8), + BrtFmlaNum(9), + BrtFmlaBool(10), + BrtFmlaError(11), + BrtRowHdr(0), + BrtCellRString(62), + BrtBeginSheet(129), + BrtWsProp(147), + BrtWsDim(148), + BrtColInfo(60), + BrtBeginSheetData(145), + BrtEndSheetData(146), + BrtHLink(494), + BrtBeginHeaderFooter(479), + + //comments + BrtBeginCommentAuthors(630), + BrtEndCommentAuthors(631), + BrtCommentAuthor(632), + BrtBeginComment(635), + BrtCommentText(637), + BrtEndComment(636), + //styles table + BrtXf(47), + BrtFmt(44), + BrtBeginFmts(615), + BrtEndFmts(616), + BrtBeginCellXFs(617), + BrtEndCellXFs(618), + BrtBeginCellStyleXFS(626), + BrtEndCellStyleXFS(627), + + //stored strings table + BrtSstItem(19), //stored strings items + BrtBeginSst(159), //stored strings begin sst + BrtEndSst(160), //stored strings end sst + + BrtBundleSh(156), //defines worksheet in wb part + Unimplemented(-1); + + + private final int id; + + XSSFBRecordType(int id) { + this.id = id; + } + + public int getId() { + return id; + } + + public static XSSFBRecordType lookup(int id) { + for (XSSFBRecordType r : XSSFBRecordType.values()) { + if (r.id == id) { + return r; + } + } + return Unimplemented; + } + +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRelation.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRelation.java new file mode 100644 index 0000000000..3f0b0286dc --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRelation.java @@ -0,0 +1,85 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +import org.apache.poi.POIXMLDocumentPart; +import org.apache.poi.POIXMLRelation; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackagePartName; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.util.Internal; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; + +/** + * Need to have this mirror class of {@link org.apache.poi.xssf.usermodel.XSSFRelation} + * because of conflicts with regular ooxml relations. + * If we failed to break this into a separate class, in the cases of SharedStrings and Styles, + * 2 parts would exist, and "Packages shall not contain equivalent part names..." + *

+ * Also, we need to avoid the possibility of breaking the marshalling process for xml. + */ +@Internal +public class XSSFBRelation extends POIXMLRelation { + private static final POILogger log = POILogFactory.getLogger(XSSFBRelation.class); + + static final XSSFBRelation SHARED_STRINGS_BINARY = new XSSFBRelation( + "application/vnd.ms-excel.sharedStrings", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings", + "/xl/sharedStrings.bin", + null + ); + + public static final XSSFBRelation STYLES_BINARY = new XSSFBRelation( + "application/vnd.ms-excel.styles", + PackageRelationshipTypes.STYLE_PART, + "/xl/styles.bin", + null + ); + + private XSSFBRelation(String type, String rel, String defaultName, Class cls) { + super(type, rel, defaultName, cls); + } + + /** + * Fetches the InputStream to read the contents, based + * of the specified core part, for which we are defined + * as a suitable relationship + */ + public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException { + PackageRelationshipCollection prc = + corePart.getRelationshipsByType(getRelation()); + Iterator it = prc.iterator(); + if (it.hasNext()) { + PackageRelationship rel = it.next(); + PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); + PackagePart part = corePart.getPackage().getPart(relName); + return part.getInputStream(); + } + log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found"); + return null; + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichStr.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichStr.java new file mode 100644 index 0000000000..e9ba59a4ea --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichStr.java @@ -0,0 +1,47 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import org.apache.poi.util.Internal; + +@Internal +class XSSFBRichStr { + + public static XSSFBRichStr build(byte[] bytes, int offset) throws XSSFBParseException { + byte first = bytes[offset]; + boolean dwSizeStrRunExists = (first >> 7 & 1) == 1;//first bit == 1? + boolean phoneticExists = (first >> 6 & 1) == 1;//second bit == 1? + StringBuilder sb = new StringBuilder(); + + int read = XSSFBUtils.readXLWideString(bytes, offset+1, sb); + //TODO: parse phonetic strings. + return new XSSFBRichStr(sb.toString(), ""); + } + + private final String string; + private final String phoneticString; + + XSSFBRichStr(String string, String phoneticString) { + this.string = string; + this.phoneticString = phoneticString; + } + + public String getString() { + return string; + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichTextString.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichTextString.java new file mode 100644 index 0000000000..1fb5b54ae8 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichTextString.java @@ -0,0 +1,80 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import org.apache.poi.ss.usermodel.Font; +import org.apache.poi.util.Internal; +import org.apache.poi.xssf.usermodel.XSSFRichTextString; + +/** + * Wrapper class around String so that we can use it in Comment. + * Nothing has been implemented yet except for {@link #getString()}. + */ +@Internal +class XSSFBRichTextString extends XSSFRichTextString { + private final String string; + + XSSFBRichTextString(String string) { + this.string = string; + } + + @Override + public void applyFont(int startIndex, int endIndex, short fontIndex) { + + } + + @Override + public void applyFont(int startIndex, int endIndex, Font font) { + + } + + @Override + public void applyFont(Font font) { + + } + + @Override + public void clearFormatting() { + + } + + @Override + public String getString() { + return string; + } + + @Override + public int length() { + return string.length(); + } + + @Override + public int numFormattingRuns() { + return 0; + } + + @Override + public int getIndexOfFormattingRun(int index) { + return 0; + } + + @Override + public void applyFont(short fontIndex) { + + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java new file mode 100644 index 0000000000..49d1a46f98 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java @@ -0,0 +1,137 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xssf.binary; + + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; +import org.xml.sax.SAXException; + +@Internal +public class XSSFBSharedStringsTable { + + /** + * An integer representing the total count of strings in the workbook. This count does not + * include any numbers, it counts only the total of text strings in the workbook. + */ + private int count; + + /** + * An integer representing the total count of unique strings in the Shared String Table. + * A string is unique even if it is a copy of another string, but has different formatting applied + * at the character level. + */ + private int uniqueCount; + + /** + * The shared strings table. + */ + private List strings = new ArrayList(); + + /** + * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table. + * @throws IOException If reading the data from the package fails. + * @throws SAXException if parsing the XML data fails. + */ + public XSSFBSharedStringsTable(OPCPackage pkg) + throws IOException, SAXException { + ArrayList parts = + pkg.getPartsByContentType(XSSFBRelation.SHARED_STRINGS_BINARY.getContentType()); + + // Some workbooks have no shared strings table. + if (parts.size() > 0) { + PackagePart sstPart = parts.get(0); + + readFrom(sstPart.getInputStream()); + } + } + + /** + * Like POIXMLDocumentPart constructor + * + * @since POI 3.14-Beta3 + */ + XSSFBSharedStringsTable(PackagePart part) throws IOException, SAXException { + readFrom(part.getInputStream()); + } + + private void readFrom(InputStream inputStream) throws IOException { + SSTBinaryReader reader = new SSTBinaryReader(inputStream); + reader.parse(); + } + + public List getItems() { + return strings; + } + + public String getEntryAt(int i) { + return strings.get(i); + } + + /** + * Return an integer representing the total count of strings in the workbook. This count does not + * include any numbers, it counts only the total of text strings in the workbook. + * + * @return the total count of strings in the workbook + */ + public int getCount() { + return this.count; + } + + /** + * Returns an integer representing the total count of unique strings in the Shared String Table. + * A string is unique even if it is a copy of another string, but has different formatting applied + * at the character level. + * + * @return the total count of unique strings in the workbook + */ + public int getUniqueCount() { + return this.uniqueCount; + } + + private class SSTBinaryReader extends XSSFBParser { + + SSTBinaryReader(InputStream is) { + super(is); + } + + @Override + public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { + XSSFBRecordType type = XSSFBRecordType.lookup(recordType); + + switch (type) { + case BrtSstItem: + XSSFBRichStr rstr = XSSFBRichStr.build(data, 0); + strings.add(rstr.getString()); + break; + case BrtBeginSst: + count = (int) LittleEndian.getUInt(data,0); + uniqueCount = (int) LittleEndian.getUInt(data, 4); + break; + } + + } + } + +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java new file mode 100644 index 0000000000..ca5dab5a38 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java @@ -0,0 +1,329 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + + +import java.io.InputStream; +import java.util.Queue; + +import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.ss.util.CellAddress; +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; +import org.apache.poi.xssf.usermodel.XSSFComment; +import org.apache.poi.xssf.usermodel.XSSFRichTextString; + +@Internal +public class XSSFBSheetHandler extends XSSFBParser { + + private final static int CHECK_ALL_ROWS = -1; + + private final XSSFBSharedStringsTable stringsTable; + private final XSSFSheetXMLHandler.SheetContentsHandler handler; + private final XSSFBStylesTable styles; + private final XSSFBCommentsTable comments; + private final DataFormatter dataFormatter; + private final boolean formulasNotResults;//TODO: implement this + + private int lastEndedRow = -1; + private int lastStartedRow = -1; + private int currentRow = 0; + private byte[] rkBuffer = new byte[8]; + private XSSFBCellRange hyperlinkCellRange = null; + private StringBuilder xlWideStringBuffer = new StringBuilder(); + + private final XSSFBCellHeader cellBuffer = new XSSFBCellHeader(); + public XSSFBSheetHandler(InputStream is, + XSSFBStylesTable styles, + XSSFBCommentsTable comments, + XSSFBSharedStringsTable strings, + XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler, + DataFormatter dataFormatter, + boolean formulasNotResults) { + super(is); + this.styles = styles; + this.comments = comments; + this.stringsTable = strings; + this.handler = sheetContentsHandler; + this.dataFormatter = dataFormatter; + this.formulasNotResults = formulasNotResults; + } + + @Override + public void handleRecord(int id, byte[] data) throws XSSFBParseException { + XSSFBRecordType type = XSSFBRecordType.lookup(id); + + switch(type) { + case BrtRowHdr: + long rw = LittleEndian.getUInt(data, 0); + if (rw > 0x00100000L) {//could make sure this is larger than currentRow, according to spec? + throw new XSSFBParseException("Row number beyond allowable range: "+rw); + } + currentRow = (int)rw; + checkMissedComments(currentRow); + startRow(currentRow); + break; + case BrtCellIsst: + handleBrtCellIsst(data); + break; + case BrtCellSt: //TODO: needs test + handleCellSt(data); + break; + case BrtCellRk: + handleCellRk(data); + break; + case BrtCellReal: + handleCellReal(data); + break; + case BrtCellBool: + handleBoolean(data); + break; + case BrtCellError: + handleCellError(data); + break; + case BrtCellBlank: + beforeCellValue(data);//read cell info and check for missing comments + break; + case BrtFmlaString: + handleFmlaString(data); + break; + case BrtFmlaNum: + handleFmlaNum(data); + break; + case BrtFmlaError: + handleFmlaError(data); + break; + //TODO: All the PCDI and PCDIA + case BrtEndSheetData: + checkMissedComments(CHECK_ALL_ROWS); + endRow(lastStartedRow); + break; + case BrtBeginHeaderFooter: + handleHeaderFooter(data); + break; + } + } + + + private void beforeCellValue(byte[] data) { + XSSFBCellHeader.parse(data, 0, currentRow, cellBuffer); + checkMissedComments(currentRow, cellBuffer.getColNum()); + } + + private void handleCellValue(String formattedValue) { + CellAddress cellAddress = new CellAddress(currentRow, cellBuffer.getColNum()); + XSSFBComment comment = null; + if (comments != null) { + comment = comments.get(cellAddress); + } + handler.cell(cellAddress.formatAsString(), formattedValue, comment); + } + + private void handleFmlaNum(byte[] data) { + beforeCellValue(data); + //xNum + double val = LittleEndian.getDouble(data, XSSFBCellHeader.length); + String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx()); + String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString); + handleCellValue(formattedVal); + } + + private void handleCellSt(byte[] data) { + beforeCellValue(data); + xlWideStringBuffer.setLength(0); + XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer); + handleCellValue(xlWideStringBuffer.toString()); + } + + private void handleFmlaString(byte[] data) { + beforeCellValue(data); + xlWideStringBuffer.setLength(0); + XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer); + handleCellValue(xlWideStringBuffer.toString()); + } + + private void handleCellError(byte[] data) { + beforeCellValue(data); + //TODO, read byte to figure out the type of error + handleCellValue("ERROR"); + } + + private void handleFmlaError(byte[] data) { + beforeCellValue(data); + //TODO, read byte to figure out the type of error + handleCellValue("ERROR"); + } + + private void handleBoolean(byte[] data) { + beforeCellValue(data); + String formattedVal = (data[XSSFBCellHeader.length] == 1) ? "TRUE" : "FALSE"; + handleCellValue(formattedVal); + } + + private void handleCellReal(byte[] data) { + beforeCellValue(data); + //xNum + double val = LittleEndian.getDouble(data, XSSFBCellHeader.length); + String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx()); + String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString); + handleCellValue(formattedVal); + } + + private void handleCellRk(byte[] data) { + beforeCellValue(data); + double val = rkNumber(data, XSSFBCellHeader.length); + String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx()); + String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString); + handleCellValue(formattedVal); + } + + private void handleBrtCellIsst(byte[] data) { + beforeCellValue(data); + long idx = LittleEndian.getUInt(data, XSSFBCellHeader.length); + //check for out of range, buffer overflow + + XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt((int)idx)); + handleCellValue(rtss.getString()); + } + + + private void handleHeaderFooter(byte[] data) { + XSSFBHeaderFooters headerFooter = XSSFBHeaderFooters.parse(data); + outputHeaderFooter(headerFooter.getHeader()); + outputHeaderFooter(headerFooter.getFooter()); + outputHeaderFooter(headerFooter.getHeaderEven()); + outputHeaderFooter(headerFooter.getFooterEven()); + outputHeaderFooter(headerFooter.getHeaderFirst()); + outputHeaderFooter(headerFooter.getFooterFirst()); + } + + private void outputHeaderFooter(XSSFBHeaderFooter headerFooter) { + String text = headerFooter.getString(); + if (text != null && text.trim().length() > 0) { + handler.headerFooter(text, headerFooter.isHeader(), headerFooter.getHeaderFooterTypeLabel()); + } + } + + + //at start of next cell or end of row, return the cellAddress if it equals currentRow and col + private void checkMissedComments(int currentRow, int colNum) { + if (comments == null) { + return; + } + Queue queue = comments.getAddresses(); + while (queue.size() > 0) { + CellAddress cellAddress = queue.peek(); + if (cellAddress.getRow() == currentRow && cellAddress.getColumn() < colNum) { + cellAddress = queue.remove(); + dumpEmptyCellComment(cellAddress, comments.get(cellAddress)); + } else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() == colNum) { + queue.remove(); + return; + } else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() > colNum) { + return; + } else if (cellAddress.getRow() > currentRow) { + return; + } + } + } + + //check for anything from rows before + private void checkMissedComments(int currentRow) { + if (comments == null) { + return; + } + Queue queue = comments.getAddresses(); + int lastInterpolatedRow = -1; + while (queue.size() > 0) { + CellAddress cellAddress = queue.peek(); + if (currentRow == CHECK_ALL_ROWS || cellAddress.getRow() < currentRow) { + cellAddress = queue.remove(); + if (cellAddress.getRow() != lastInterpolatedRow) { + startRow(cellAddress.getRow()); + } + dumpEmptyCellComment(cellAddress, comments.get(cellAddress)); + lastInterpolatedRow = cellAddress.getRow(); + } else { + break; + } + } + + } + + private void startRow(int row) { + if (row == lastStartedRow) { + return; + } + + if (lastStartedRow != lastEndedRow) { + endRow(lastStartedRow); + } + handler.startRow(row); + lastStartedRow = row; + } + + private void endRow(int row) { + if (lastEndedRow == row) { + return; + } + handler.endRow(row); + lastEndedRow = row; + } + + private void dumpEmptyCellComment(CellAddress cellAddress, XSSFBComment comment) { + handler.cell(cellAddress.formatAsString(), null, comment); + } + + private double rkNumber(byte[] data, int offset) { + //see 2.5.122 for this abomination + byte b0 = data[offset]; + String s = Integer.toString(b0, 2); + boolean numDivBy100 = ((b0 & 1) == 1); // else as is + boolean floatingPoint = ((b0 >> 1 & 1) == 0); // else signed integer + + //unset highest 2 bits + b0 &= ~1; + b0 &= ~(1<<1); + + rkBuffer[4] = b0; + for (int i = 1; i < 4; i++) { + rkBuffer[i+4] = data[offset+i]; + } + double d = 0.0; + if (floatingPoint) { + d = LittleEndian.getDouble(rkBuffer); + } else { + d = LittleEndian.getInt(rkBuffer); + } + d = (numDivBy100) ? d/100 : d; + return d; + } + + /** + * You need to implement this to handle the results + * of the sheet parsing. + */ + public interface SheetContentsHandler extends XSSFSheetXMLHandler.SheetContentsHandler { + /** + * A cell, with the given formatted value (may be null), + * a url (may be null), a toolTip (may be null) + * and possibly a comment (may be null), was encountered */ + void hyperlinkCell(String cellReference, String formattedValue, String url, String toolTip, XSSFComment comment); + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBStylesTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBStylesTable.java new file mode 100644 index 0000000000..8584e95330 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBStylesTable.java @@ -0,0 +1,101 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.poi.POIXMLException; +import org.apache.poi.ss.usermodel.BuiltinFormats; +import org.apache.poi.util.Internal; + +@Internal +public class XSSFBStylesTable extends XSSFBParser { + + private final SortedMap numberFormats = new TreeMap(); + private final List styleIds = new ArrayList(); + + private boolean inCellXFS = false; + private boolean inFmts = false; + public XSSFBStylesTable(InputStream is) throws IOException { + super(is); + parse(); + } + + String getNumberFormatString(int idx) { + if (numberFormats.containsKey(styleIds.get((short)idx))) { + return numberFormats.get(styleIds.get((short)idx)); + } + + return BuiltinFormats.getBuiltinFormat(styleIds.get((short)idx)); + } + + @Override + public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { + XSSFBRecordType type = XSSFBRecordType.lookup(recordType); + switch (type) { + case BrtBeginCellXFs: + inCellXFS = true; + break; + case BrtEndCellXFs: + inCellXFS = false; + break; + case BrtXf: + if (inCellXFS) { + handleBrtXFInCellXF(data); + } + break; + case BrtBeginFmts: + inFmts = true; + break; + case BrtEndFmts: + inFmts = false; + break; + case BrtFmt: + if (inFmts) { + handleFormat(data); + } + break; + + } + } + + private void handleFormat(byte[] data) { + int ifmt = data[0] & 0xFF; + if (ifmt > Short.MAX_VALUE) { + throw new POIXMLException("Format id must be a short"); + } + StringBuilder sb = new StringBuilder(); + XSSFBUtils.readXLWideString(data, 2, sb); + String fmt = sb.toString(); + numberFormats.put((short)ifmt, fmt); + } + + private void handleBrtXFInCellXF(byte[] data) { + int ifmtOffset = 2; + //int ifmtLength = 2; + + //numFmtId in xml terms + int ifmt = data[ifmtOffset] & 0xFF;//the second byte is ignored + styleIds.add((short)ifmt); + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBUtils.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBUtils.java new file mode 100644 index 0000000000..e3a46b0f04 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBUtils.java @@ -0,0 +1,108 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + + +import java.nio.charset.Charset; + +import org.apache.poi.POIXMLException; +import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; + +@Internal +public class XSSFBUtils { + + /** + * Reads an XLNullableWideString. + * @param data data from which to read + * @param offset in data from which to start + * @param sb buffer to which to write. You must setLength(0) before calling! + * @return number of bytes read + * @throws XSSFBParseException if there was an exception during reading + */ + static int readXLNullableWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException { + long numChars = LittleEndian.getUInt(data, offset); + if (numChars < 0) { + throw new XSSFBParseException("too few chars to read"); + } else if (numChars == 0xFFFFFFFFL) { //this means null value (2.5.166), do not read any bytes!!! + return 0; + } else if (numChars > 0xFFFFFFFFL) { + throw new XSSFBParseException("too many chars to read"); + } + + int numBytes = 2*(int)numChars; + offset += 4; + if (offset+numBytes > data.length) { + throw new XSSFBParseException("trying to read beyond data length:" + + "offset="+offset+", numBytes="+numBytes+", data.length="+data.length); + } + sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE"))); + numBytes+=4; + return numBytes; + } + + + /** + * Reads an XLNullableWideString. + * @param data data from which to read + * @param offset in data from which to start + * @param sb buffer to which to write. You must setLength(0) before calling! + * @return number of bytes read + * @throws XSSFBParseException if there was an exception while trying to read the string + */ + public static int readXLWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException { + long numChars = LittleEndian.getUInt(data, offset); + if (numChars < 0) { + throw new XSSFBParseException("too few chars to read"); + } else if (numChars > 0xFFFFFFFFL) { + throw new XSSFBParseException("too many chars to read"); + } + int numBytes = 2*(int)numChars; + offset += 4; + if (offset+numBytes > data.length) { + throw new XSSFBParseException("trying to read beyond data length"); + } + sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE"))); + numBytes+=4; + return numBytes; + } + + static int castToInt(long val) { + if (val < Integer.MAX_VALUE && val > Integer.MIN_VALUE) { + return (int)val; + } + throw new POIXMLException("val ("+val+") can't be cast to int"); + } + + static short castToShort(int val) { + if (val < Short.MAX_VALUE && val > Short.MIN_VALUE) { + return (short)val; + } + throw new POIXMLException("val ("+val+") can't be cast to short"); + + } + + //TODO: move to LittleEndian? + static int get24BitInt( byte[] data, int offset) { + int i = offset; + int b0 = data[i++] & 0xFF; + int b1 = data[i++] & 0xFF; + int b2 = data[i] & 0xFF; + return ( b2 << 16 ) + ( b1 << 8 ) + b0; + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java new file mode 100644 index 0000000000..a02e8ce922 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java @@ -0,0 +1,117 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import org.apache.poi.ss.util.CellRangeAddress; +import org.apache.poi.util.Internal; + +/** + * This is a read only record that maintains information about + * a hyperlink. In OOXML land, this information has to be merged + * from 1) the sheet's .rels to get the url and 2) from after the + * sheet data in they hyperlink section. + * + * The {@link #display} is often empty and should be filled from + * the contents of the anchor cell. + * + */ +@Internal +public class XSSFHyperlinkRecord { + + private final CellRangeAddress cellRangeAddress; + private final String relId; + private String location; + private String toolTip; + private String display; + + XSSFHyperlinkRecord(CellRangeAddress cellRangeAddress, String relId, String location, String toolTip, String display) { + this.cellRangeAddress = cellRangeAddress; + this.relId = relId; + this.location = location; + this.toolTip = toolTip; + this.display = display; + } + + void setLocation(String location) { + this.location = location; + } + + void setToolTip(String toolTip) { + this.toolTip = toolTip; + } + + void setDisplay(String display) { + this.display = display; + } + + CellRangeAddress getCellRangeAddress() { + return cellRangeAddress; + } + + public String getRelId() { + return relId; + } + + public String getLocation() { + return location; + } + + public String getToolTip() { + return toolTip; + } + + public String getDisplay() { + return display; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + XSSFHyperlinkRecord that = (XSSFHyperlinkRecord) o; + + if (cellRangeAddress != null ? !cellRangeAddress.equals(that.cellRangeAddress) : that.cellRangeAddress != null) + return false; + if (relId != null ? !relId.equals(that.relId) : that.relId != null) return false; + if (location != null ? !location.equals(that.location) : that.location != null) return false; + if (toolTip != null ? !toolTip.equals(that.toolTip) : that.toolTip != null) return false; + return display != null ? display.equals(that.display) : that.display == null; + } + + @Override + public int hashCode() { + int result = cellRangeAddress != null ? cellRangeAddress.hashCode() : 0; + result = 31 * result + (relId != null ? relId.hashCode() : 0); + result = 31 * result + (location != null ? location.hashCode() : 0); + result = 31 * result + (toolTip != null ? toolTip.hashCode() : 0); + result = 31 * result + (display != null ? display.hashCode() : 0); + return result; + } + + @Override + public String toString() { + return "XSSFHyperlinkRecord{" + + "cellRangeAddress=" + cellRangeAddress + + ", relId='" + relId + '\'' + + ", location='" + location + '\'' + + ", toolTip='" + toolTip + '\'' + + ", display='" + display + '\'' + + '}'; + } +} diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/package.html b/src/ooxml/java/org/apache/poi/xssf/binary/package.html new file mode 100644 index 0000000000..c7e4a018bc --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/binary/package.html @@ -0,0 +1,44 @@ + + + + + + + +

The org.apache.poi.xssf.binary package includes necessary underlying components +for streaming/read-only processing of xlsb files. +

+

+ POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader + in o.a.p.xssf.eventusermodel. +

+

+ This feature was added in poi-3.15-beta3 and should be considered experimental. Most classes + have been marked @Internal and the API is subject to change. +

+

Related Documentation

+ +For overviews, tutorials, examples, guides, and tool documentation, please see: + + + + diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java new file mode 100644 index 0000000000..b8f54cdf53 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java @@ -0,0 +1,172 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xssf.eventusermodel; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.openxml4j.opc.PackagePartName; +import org.apache.poi.openxml4j.opc.PackageRelationship; +import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackagingURIHelper; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.xssf.binary.XSSFBCommentsTable; +import org.apache.poi.xssf.binary.XSSFBParseException; +import org.apache.poi.xssf.binary.XSSFBParser; +import org.apache.poi.xssf.binary.XSSFBRecordType; +import org.apache.poi.xssf.binary.XSSFBRelation; +import org.apache.poi.xssf.binary.XSSFBStylesTable; +import org.apache.poi.xssf.binary.XSSFBUtils; +import org.apache.poi.xssf.model.CommentsTable; +import org.apache.poi.xssf.usermodel.XSSFRelation; + +/** + * Reader for xlsb files. + */ +public class XSSFBReader extends XSSFReader { + /** + * Creates a new XSSFReader, for the given package + * + * @param pkg opc package + */ + public XSSFBReader(OPCPackage pkg) throws IOException, OpenXML4JException { + super(pkg); + } + + /** + * Returns an Iterator which will let you get at all the + * different Sheets in turn. + * Each sheet's InputStream is only opened when fetched + * from the Iterator. It's up to you to close the + * InputStreams when done with each one. + */ + @Override + public Iterator getSheetsData() throws IOException, InvalidFormatException { + return new SheetIterator(workbookPart); + } + + public XSSFBStylesTable getXSSFBStylesTable() throws IOException { + ArrayList parts = pkg.getPartsByContentType(XSSFBRelation.STYLES_BINARY.getContentType()); + if(parts.size() == 0) return null; + + // Create the Styles Table, and associate the Themes if present + return new XSSFBStylesTable(parts.get(0).getInputStream()); + + } + + + public static class SheetIterator extends XSSFReader.SheetIterator { + + /** + * Construct a new SheetIterator + * + * @param wb package part holding workbook.xml + */ + private SheetIterator(PackagePart wb) throws IOException { + super(wb); + } + + Iterator createSheetIteratorFromWB(PackagePart wb) throws IOException { + SheetRefLoader sheetRefLoader = new SheetRefLoader(wb.getInputStream()); + sheetRefLoader.parse(); + return sheetRefLoader.getSheets().iterator(); + } + + /** + * Not supported by XSSFBReader's SheetIterator. + * Please use {@link #getXSSFBSheetComments()} instead. + * @return nothing, always throws IllegalArgumentException! + */ + @Override + public CommentsTable getSheetComments() { + throw new IllegalArgumentException("Please use getXSSFBSheetComments"); + } + + public XSSFBCommentsTable getXSSFBSheetComments() { + PackagePart sheetPkg = getSheetPart(); + + // Do we have a comments relationship? (Only ever one if so) + try { + PackageRelationshipCollection commentsList = + sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation()); + if (commentsList.size() > 0) { + PackageRelationship comments = commentsList.getRelationship(0); + if (comments == null || comments.getTargetURI() == null) { + return null; + } + PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI()); + PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName); + return new XSSFBCommentsTable(commentsPart.getInputStream()); + } + } catch (InvalidFormatException e) { + return null; + } catch (IOException e) { + return null; + } + return null; + } + + } + + private static class SheetRefLoader extends XSSFBParser { + List sheets = new LinkedList(); + + private SheetRefLoader(InputStream is) { + super(is); + } + + @Override + public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { + if (recordType == XSSFBRecordType.BrtBundleSh.getId()) { + addWorksheet(data); + } + } + + private void addWorksheet(byte[] data) { + int offset = 0; + //this is the sheet state #2.5.142 + long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; + + long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; + //according to #2.4.304 + if (iTabID < 1 || iTabID > 0x0000FFFFL) { + throw new XSSFBParseException("table id out of range: "+iTabID); + } + StringBuilder sb = new StringBuilder(); + offset += XSSFBUtils.readXLWideString(data, offset, sb); + String relId = sb.toString(); + sb.setLength(0); + XSSFBUtils.readXLWideString(data, offset, sb); + String name = sb.toString(); + if (relId != null && relId.trim().length() > 0) { + sheets.add(new XSSFSheetRef(relId, name)); + } + } + + List getSheets() { + return sheets; + } + } +} \ No newline at end of file diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java index e5c9cb25b1..5b43c20101 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java @@ -16,15 +16,16 @@ ==================================================================== */ package org.apache.poi.xssf.eventusermodel; -import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; - +import javax.xml.parsers.ParserConfigurationException; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Locale; import java.util.Map; import org.apache.poi.POIXMLException; @@ -39,6 +40,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; +import org.apache.poi.util.SAXHelper; import org.apache.poi.xssf.model.CommentsTable; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; @@ -47,9 +49,11 @@ import org.apache.poi.xssf.usermodel.XSSFDrawing; import org.apache.poi.xssf.usermodel.XSSFRelation; import org.apache.poi.xssf.usermodel.XSSFShape; import org.apache.xmlbeans.XmlException; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument; +import org.xml.sax.Attributes; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.XMLReader; +import org.xml.sax.helpers.DefaultHandler; /** * This class makes it easy to get at individual parts @@ -62,8 +66,8 @@ public class XSSFReader { private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class); - private OPCPackage pkg; - private PackagePart workbookPart; + protected OPCPackage pkg; + protected PackagePart workbookPart; /** * Creates a new XSSFReader, for the given package @@ -194,23 +198,23 @@ public class XSSFReader { private final Map sheetMap; /** - * Current CTSheet bean + * Current sheet reference */ - private CTSheet ctSheet; - + XSSFSheetRef xssfSheetRef; + /** * Iterator over CTSheet objects, returns sheets in logical order. * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order, * i.e. as they are stored in the underlying package */ - private final Iterator sheetIterator; + final Iterator sheetIterator; /** * Construct a new SheetIterator * * @param wb package part holding workbook.xml */ - private SheetIterator(PackagePart wb) throws IOException { + SheetIterator(PackagePart wb) throws IOException { /** * The order of sheets is defined by the order of CTSheet elements in workbook.xml @@ -228,25 +232,44 @@ public class XSSFReader { sheetMap.put(rel.getId(), pkg.getPart(relName)); } } - //step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator - //Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search - CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook(); - List validSheets = new ArrayList(); - for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) { - //if there's no relationship id, silently skip the sheet - String sheetId = ctSheet.getId(); - if (sheetId != null && sheetId.length() > 0) { - validSheets.add(ctSheet); - } - } - sheetIterator = validSheets.iterator(); + //step 2. Read array of CTSheet elements, wrap it in a LinkedList + //and construct an iterator + sheetIterator = createSheetIteratorFromWB(wb); } catch (InvalidFormatException e){ throw new POIXMLException(e); - } catch (XmlException e){ + } + } + + Iterator createSheetIteratorFromWB(PackagePart wb) throws IOException { + + XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader(); + XMLReader xmlReader = null; + try { + xmlReader = SAXHelper.newXMLReader(); + } catch (ParserConfigurationException e) { + throw new POIXMLException(e); + } catch (SAXException e) { throw new POIXMLException(e); } + xmlReader.setContentHandler(xmlSheetRefReader); + try { + xmlReader.parse(new InputSource(wb.getInputStream())); + } catch (SAXException e) { + throw new POIXMLException(e); + } + + List validSheets = new ArrayList(); + for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) { + //if there's no relationship id, silently skip the sheet + String sheetId = xssfSheetRef.getId(); + if (sheetId != null && sheetId.length() > 0) { + validSheets.add(xssfSheetRef); + } + } + return validSheets.iterator(); } + /** * Returns true if the iteration has more elements. * @@ -264,9 +287,9 @@ public class XSSFReader { */ @Override public InputStream next() { - ctSheet = sheetIterator.next(); + xssfSheetRef = sheetIterator.next(); - String sheetId = ctSheet.getId(); + String sheetId = xssfSheetRef.getId(); try { PackagePart sheetPkg = sheetMap.get(sheetId); return sheetPkg.getInputStream(); @@ -281,7 +304,7 @@ public class XSSFReader { * @return name of the current sheet */ public String getSheetName() { - return ctSheet.getName(); + return xssfSheetRef.getName(); } /** @@ -344,7 +367,7 @@ public class XSSFReader { } public PackagePart getSheetPart() { - String sheetId = ctSheet.getId(); + String sheetId = xssfSheetRef.getId(); return sheetMap.get(sheetId); } @@ -356,4 +379,52 @@ public class XSSFReader { throw new IllegalStateException("Not supported"); } } + + protected final static class XSSFSheetRef { + //do we need to store sheetId, too? + private final String id; + private final String name; + + public XSSFSheetRef(String id, String name) { + this.id = id; + this.name = name; + } + + public String getId() { + return id; + } + + public String getName() { + return name; + } + } + + //scrapes sheet reference info and order from workbook.xml + private static class XMLSheetRefReader extends DefaultHandler { + private final static String SHEET = "sheet"; + private final static String ID = "id"; + private final static String NAME = "name"; + + private final List sheetRefs = new LinkedList(); + + @Override + public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException { + if (localName.toLowerCase(Locale.US).equals(SHEET)) { + String name = null; + String id = null; + for (int i = 0; i < attrs.getLength(); i++) { + if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) { + name = attrs.getValue(i); + } else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) { + id = attrs.getValue(i); + } + sheetRefs.add(new XSSFSheetRef(id, name)); + } + } + } + + List getSheetRefs() { + return Collections.unmodifiableList(sheetRefs); + } + } } diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java new file mode 100644 index 0000000000..b3e667e4a7 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java @@ -0,0 +1,160 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xssf.extractor; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.xssf.binary.XSSFBCommentsTable; +import org.apache.poi.xssf.binary.XSSFBHyperlinksTable; +import org.apache.poi.xssf.binary.XSSFBSharedStringsTable; +import org.apache.poi.xssf.binary.XSSFBSheetHandler; +import org.apache.poi.xssf.binary.XSSFBStylesTable; +import org.apache.poi.xssf.eventusermodel.XSSFBReader; +import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; +import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.apache.xmlbeans.XmlException; +import org.xml.sax.SAXException; + +/** + * Implementation of a text extractor or xlsb Excel + * files that uses SAX-like binary parsing. + */ +public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor + implements org.apache.poi.ss.extractor.ExcelExtractor { + + public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] { + XSSFRelation.XLSB_BINARY_WORKBOOK + }; + + private boolean handleHyperlinksInCells = false; + + public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { + super(path); + } + + public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException { + super(container); + } + + public static void main(String[] args) throws Exception { + if (args.length < 1) { + System.err.println("Use:"); + System.err.println(" XSSFBEventBasedExcelExtractor "); + System.exit(1); + } + POIXMLTextExtractor extractor = + new XSSFBEventBasedExcelExtractor(args[0]); + System.out.println(extractor.getText()); + extractor.close(); + } + + public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) { + this.handleHyperlinksInCells = handleHyperlinksInCells; + } + + /** + * Should we return the formula itself, and not + * the result it produces? Default is false + * This is currently unsupported for xssfb + */ + @Override + public void setFormulasNotResults(boolean formulasNotResults) { + throw new IllegalArgumentException("Not currently supported"); + } + + /** + * Processes the given sheet + */ + public void processSheet( + SheetContentsHandler sheetContentsExtractor, + XSSFBStylesTable styles, + XSSFBCommentsTable comments, + XSSFBSharedStringsTable strings, + InputStream sheetInputStream) + throws IOException, SAXException { + + DataFormatter formatter; + if (locale == null) { + formatter = new DataFormatter(); + } else { + formatter = new DataFormatter(locale); + } + + XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler( + sheetInputStream, + styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults + ); + xssfbSheetHandler.parse(); + } + + /** + * Processes the file and returns the text + */ + public String getText() { + try { + XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container); + XSSFBReader xssfbReader = new XSSFBReader(container); + XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable(); + XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData(); + + StringBuffer text = new StringBuffer(); + SheetTextExtractor sheetExtractor = new SheetTextExtractor(); + XSSFBHyperlinksTable hyperlinksTable = null; + while (iter.hasNext()) { + InputStream stream = iter.next(); + if (includeSheetNames) { + text.append(iter.getSheetName()); + text.append('\n'); + } + if (handleHyperlinksInCells) { + hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart()); + } + XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null; + processSheet(sheetExtractor, styles, comments, strings, stream); + if (includeHeadersFooters) { + sheetExtractor.appendHeaderText(text); + } + sheetExtractor.appendCellText(text); + if (includeTextBoxes) { + processShapes(iter.getShapes(), text); + } + if (includeHeadersFooters) { + sheetExtractor.appendFooterText(text); + } + sheetExtractor.reset(); + stream.close(); + } + + return text.toString(); + } catch (IOException e) { + System.err.println(e); + return null; + } catch (SAXException se) { + System.err.println(se); + return null; + } catch (OpenXML4JException o4je) { + System.err.println(o4je); + return null; + } + } + +} diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java index e49c11c2ea..2cfa099d9d 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java @@ -54,15 +54,15 @@ import org.xml.sax.XMLReader; */ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor { - private OPCPackage container; + OPCPackage container; private POIXMLProperties properties; - private Locale locale; - private boolean includeTextBoxes = true; - private boolean includeSheetNames = true; - private boolean includeCellComments = false; - private boolean includeHeadersFooters = true; - private boolean formulasNotResults = false; + Locale locale; + boolean includeTextBoxes = true; + boolean includeSheetNames = true; + boolean includeCellComments = false; + boolean includeHeadersFooters = true; + boolean formulasNotResults = false; private boolean concatenatePhoneticRuns = true; public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { @@ -240,7 +240,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor } } - private void processShapes(List shapes, StringBuffer text) { + void processShapes(List shapes, StringBuffer text) { if (shapes == null){ return; } @@ -349,7 +349,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor * @see XSSFExcelExtractor#getText() * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter) */ - private void appendHeaderText(StringBuffer buffer) { + void appendHeaderText(StringBuffer buffer) { appendHeaderFooterText(buffer, "firstHeader"); appendHeaderFooterText(buffer, "oddHeader"); appendHeaderFooterText(buffer, "evenHeader"); @@ -361,7 +361,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor * @see XSSFExcelExtractor#getText() * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter) */ - private void appendFooterText(StringBuffer buffer) { + void appendFooterText(StringBuffer buffer) { // append the text for each footer type in the same order // they are appended in XSSFExcelExtractor appendHeaderFooterText(buffer, "firstFooter"); @@ -372,7 +372,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor /** * Append the cell contents we have collected. */ - private void appendCellText(StringBuffer buffer) { + void appendCellText(StringBuffer buffer) { checkMaxTextSize(buffer, output.toString()); buffer.append(output); } @@ -380,7 +380,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor /** * Reset this SheetTextExtractor for the next sheet. */ - private void reset() { + void reset() { output.setLength(0); firstCellOfRow = true; if (headerFooterMap != null) { diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java index 0d7bc5a8a3..8405447c00 100644 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -68,6 +68,7 @@ public class TestExtractorFactory { private static File xlsxStrict; private static File xltx; private static File xlsEmb; + private static File xlsb; private static File doc; private static File doc6; @@ -108,6 +109,7 @@ public class TestExtractorFactory { xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx"); xltx = getFileAndCheck(ssTests, "test.xltx"); xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls"); + xlsb = getFileAndCheck(ssTests, "testVarious.xlsb"); POIDataSamples wpTests = POIDataSamples.getDocumentInstance(); doc = getFileAndCheck(wpTests, "SampleDoc.doc"); @@ -172,6 +174,13 @@ public class TestExtractorFactory { ); extractor.close(); + extractor = ExtractorFactory.createExtractor(xlsb); + assertTrue( + extractor.getText().contains("test") + ); + extractor.close(); + + extractor = ExtractorFactory.createExtractor(xltx); assertTrue( extractor.getText().contains("test") diff --git a/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java b/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java new file mode 100644 index 0000000000..7bf1cf391f --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java @@ -0,0 +1,56 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import static org.junit.Assert.assertEquals; + +import java.util.List; +import java.util.regex.Pattern; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.junit.Test; + +public class TestXSSFBSharedStringsTable { + + + private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); + + @Test + public void testBasic() throws Exception { + + OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsb")); + List parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.bin")); + assertEquals(1, parts.size()); + + XSSFBSharedStringsTable rtbl = new XSSFBSharedStringsTable(parts.get(0)); + List strings = rtbl.getItems(); + assertEquals(49, strings.size()); + + assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0)); + assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3)); + assertEquals(55, rtbl.getCount()); + assertEquals(49, rtbl.getUniqueCount()); + + //TODO: add in tests for phonetic runs + + } + + +} diff --git a/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java b/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java new file mode 100644 index 0000000000..992517dfc6 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java @@ -0,0 +1,54 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.binary; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.util.List; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.ss.util.CellAddress; +import org.apache.poi.xssf.eventusermodel.XSSFBReader; +import org.apache.poi.xssf.eventusermodel.XSSFReader; +import org.junit.Test; + +public class TestXSSFBSheetHyperlinkManager { + + private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); + + @Test + public void testBasic() throws Exception { + + OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb")); + XSSFBReader reader = new XSSFBReader(pkg); + XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData(); + it.next(); + XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart()); + List records = manager.getHyperLinks().get(new CellAddress(0, 0)); + assertNotNull(records); + assertEquals(1, records.size()); + XSSFHyperlinkRecord record = records.get(0); + assertEquals("http://tika.apache.org/", record.getLocation()); + assertEquals("rId2", record.getRelId()); + + } + + +} diff --git a/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java new file mode 100644 index 0000000000..57e1e836c7 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java @@ -0,0 +1,224 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.eventusermodel; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.poi.POIDataSamples; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.xssf.binary.XSSFBSharedStringsTable; +import org.apache.poi.xssf.binary.XSSFBSheetHandler; +import org.apache.poi.xssf.binary.XSSFBStylesTable; +import org.apache.poi.xssf.usermodel.XSSFComment; +import org.junit.Test; + +public class TestXSSFBReader { + + private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); + + @Test + public void testBasic() throws Exception { + List sheetTexts = getSheets("testVarious.xlsb"); + + assertEquals(1, sheetTexts.size()); + String xsxml = sheetTexts.get(0); + assertContains("This is a string", xsxml); + assertContains("13", xsxml); + assertContains("13.12112313", xsxml); + assertContains("$ 3.03", xsxml); + assertContains("20%", xsxml); + assertContains("13.12", xsxml); + assertContains("1.23457E+14", xsxml); + assertContains("1.23457E+15", xsxml); + + assertContains("46/1963", xsxml);//custom format 1 + assertContains("3/128", xsxml);//custom format 2 + + assertContains("\n" + + "\tlonger int\n" + + "\t1.23457E+15\n" + + "\tAllison, Timothy B.:\n" + + "test comment2\n" + + "", xsxml); + + assertContains("\n" + + "\tcomment6Allison, Timothy B.:\n" + + "comment6 actually in cell\n" + + "", xsxml); + + assertContains("\n" + + "\tAllison, Timothy B.:\n" + + "comment7 end of file\n" + + "", xsxml); + + assertContains("\n" + + "\tAllison, Timothy B.:\n" + + "comment8 end of file\n" + + "", xsxml); + + assertContains("
OddLeftHeader OddCenterHeader OddRightHeader
", xsxml); + assertContains("
OddLeftFooter OddCenterFooter OddRightFooter
", xsxml); + assertContains( + "
EvenLeftHeader EvenCenterHeader EvenRightHeader\n
", + xsxml); + assertContains( + "
EvenLeftFooter EvenCenterFooter EvenRightFooter
", + xsxml); + assertContains( + "
FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader
", + xsxml); + assertContains( + "
FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter
", + xsxml); + + } + + @Test + public void testComments() throws Exception { + List sheetTexts = getSheets("comments.xlsb"); + String xsxml = sheetTexts.get(0); + assertContains( + "\n" + + "\tcomment top row1 (index0)\n" + + "\trow1\n" + + "", xsxml); + assertContains( + "\n" + + "\tAllison, Timothy B.:\n" + + "comment row2 (index1)\n" + + "", + xsxml); + assertContains("\n" + + "\trow3comment top row3 (index2)\n" + + "\trow3\n", xsxml); + + assertContains("\n" + + "\tcomment top row4 (index3)\n" + + "\trow4\n" + + "", xsxml); + + } + + private List getSheets(String testFileName) throws Exception { + OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName)); + List sheetTexts = new ArrayList(); + XSSFBReader r = new XSSFBReader(pkg); + +// assertNotNull(r.getWorkbookData()); + // assertNotNull(r.getSharedStringsData()); + assertNotNull(r.getXSSFBStylesTable()); + XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg); + XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable(); + XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData(); + + while (it.hasNext()) { + InputStream is = it.next(); + String name = it.getSheetName(); + TestSheetHandler testSheetHandler = new TestSheetHandler(); + testSheetHandler.startSheet(name); + XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is, + xssfbStylesTable, + it.getXSSFBSheetComments(), + sst, testSheetHandler, + new DataFormatter(), + false); + sheetHandler.parse(); + testSheetHandler.endSheet(); + sheetTexts.add(testSheetHandler.toString()); + } + return sheetTexts; + + } + + //This converts all [\r\n\t]+ to " " + private void assertContains(String needle, String haystack) { + needle = needle.replaceAll("[\r\n\t]+", " "); + haystack = haystack.replaceAll("[\r\n\t]+", " "); + if (haystack.indexOf(needle) < 0) { + fail("couldn't find >"+needle+"< in: "+haystack ); + } + } + + + @Test + public void testDate() throws Exception { + List sheets = getSheets("date.xlsb"); + assertEquals(1, sheets.size()); + assertContains("1/12/13", sheets.get(0)); + + } + + + private class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler { + private final StringBuilder sb = new StringBuilder(); + + public void startSheet(String sheetName) { + sb.append(""); + } + + public void endSheet(){ + sb.append(""); + } + @Override + public void startRow(int rowNum) { + sb.append("\n"); + } + + @Override + public void endRow(int rowNum) { + sb.append("\n"); + } + + @Override + public void cell(String cellReference, String formattedValue, XSSFComment comment) { + formattedValue = (formattedValue == null) ? "" : formattedValue; + if (comment == null) { + sb.append("\n\t").append(formattedValue).append(""); + } else { + sb.append("\n\t") + .append(formattedValue) + .append("") + .append(comment.getString().toString().trim()).append("") + .append(""); + } + } + + @Override + public void headerFooter(String text, boolean isHeader, String tagName) { + if (isHeader) { + sb.append("
"+text+"
"); + } else { + sb.append(""); + + } + } + + @Override + public String toString() { + return sb.toString(); + } + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java new file mode 100644 index 0000000000..da38882abb --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java @@ -0,0 +1,102 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.extractor; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import org.apache.poi.xssf.XSSFTestDataSamples; +import org.junit.Test; + +/** + * Tests for {@link XSSFBEventBasedExcelExtractor} + */ +public class TestXSSFBEventBasedExcelExtractor { + + + protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { + return new XSSFBEventBasedExcelExtractor(XSSFTestDataSamples. + openSamplePackage(sampleName)); + } + + /** + * Get text out of the simple file + */ + @Test + public void testGetSimpleText() throws Exception { + // a very simple file + XSSFEventBasedExcelExtractor extractor = getExtractor("sample.xlsb"); + extractor.setIncludeCellComments(true); + extractor.getText(); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Check sheet names + assertTrue(text.startsWith("Sheet1")); + assertTrue(text.endsWith("Sheet3\n")); + + // Now without, will have text + extractor.setIncludeSheetNames(false); + text = extractor.getText(); + String CHUNK1 = + "Lorem\t111\n" + + "ipsum\t222\n" + + "dolor\t333\n" + + "sit\t444\n" + + "amet\t555\n" + + "consectetuer\t666\n" + + "adipiscing\t777\n" + + "elit\t888\n" + + "Nunc\t999\n"; + String CHUNK2 = + "The quick brown fox jumps over the lazy dog\n" + + "hello, xssf hello, xssf\n" + + "hello, xssf hello, xssf\n" + + "hello, xssf hello, xssf\n" + + "hello, xssf hello, xssf\n"; + assertEquals( + CHUNK1 + + "at\t4995\n" + + CHUNK2 + , text); + + } + + + /** + * Test text extraction from text box using getShapes() + * + * @throws Exception + */ + @Test + public void testShapes() throws Exception { + XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsb"); + + try { + String text = ooxmlExtractor.getText(); + + assertTrue(text.indexOf("Line 1") > -1); + assertTrue(text.indexOf("Line 2") > -1); + assertTrue(text.indexOf("Line 3") > -1); + } finally { + ooxmlExtractor.close(); + } + } + +} diff --git a/test-data/spreadsheet/51519.xlsb b/test-data/spreadsheet/51519.xlsb new file mode 100644 index 0000000000..54876cdb9c Binary files /dev/null and b/test-data/spreadsheet/51519.xlsb differ diff --git a/test-data/spreadsheet/WithTextBox.xlsb b/test-data/spreadsheet/WithTextBox.xlsb new file mode 100644 index 0000000000..558395dc13 Binary files /dev/null and b/test-data/spreadsheet/WithTextBox.xlsb differ diff --git a/test-data/spreadsheet/comments.xlsb b/test-data/spreadsheet/comments.xlsb new file mode 100644 index 0000000000..881e51c867 Binary files /dev/null and b/test-data/spreadsheet/comments.xlsb differ diff --git a/test-data/spreadsheet/date.xlsb b/test-data/spreadsheet/date.xlsb new file mode 100644 index 0000000000..d47d602940 Binary files /dev/null and b/test-data/spreadsheet/date.xlsb differ diff --git a/test-data/spreadsheet/hyperlink.xlsb b/test-data/spreadsheet/hyperlink.xlsb new file mode 100644 index 0000000000..2a08936823 Binary files /dev/null and b/test-data/spreadsheet/hyperlink.xlsb differ diff --git a/test-data/spreadsheet/sample.xlsb b/test-data/spreadsheet/sample.xlsb new file mode 100644 index 0000000000..676b4da2d6 Binary files /dev/null and b/test-data/spreadsheet/sample.xlsb differ diff --git a/test-data/spreadsheet/testVarious.xlsb b/test-data/spreadsheet/testVarious.xlsb new file mode 100644 index 0000000000..22cc9b41d3 Binary files /dev/null and b/test-data/spreadsheet/testVarious.xlsb differ