git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1787228 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_16_FINAL
@@ -56,6 +56,7 @@ import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; | |||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; | |||
import org.apache.poi.xslf.usermodel.XSLFRelation; | |||
import org.apache.poi.xslf.usermodel.XSLFSlideShow; | |||
import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; | |||
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; | |||
import org.apache.poi.xssf.extractor.XSSFExcelExtractor; | |||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||
@@ -244,6 +245,13 @@ public class ExtractorFactory { | |||
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg)); | |||
} | |||
// How about xlsb? | |||
for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) { | |||
if (rel.getContentType().equals(contentType)) { | |||
return new XSSFBEventBasedExcelExtractor(pkg); | |||
} | |||
} | |||
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")"); | |||
} catch (IOException e) { |
@@ -19,7 +19,9 @@ package org.apache.poi.xssf; | |||
import org.apache.poi.UnsupportedFileFormatException; | |||
/** | |||
* We don't support .xlsb files, sorry | |||
* We don't support .xlsb for read and write via {@link org.apache.poi.xssf.usermodel.XSSFWorkbook}. | |||
* As of POI 3.15-beta3, we do support streaming reading of xlsb files | |||
* via {@link org.apache.poi.xssf.eventusermodel.XSSFBReader} | |||
*/ | |||
public class XLSBUnsupportedException extends UnsupportedFileFormatException { | |||
private static final long serialVersionUID = 7849681804154571175L; |
@@ -0,0 +1,71 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import org.apache.poi.ss.util.CellReference; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndian; | |||
/** | |||
* This class encapsulates what the spec calls a "Cell" object. | |||
* I added "Header" to clarify that this does not contain the contents | |||
* of the cell, only the column number, the style id and the phonetic boolean | |||
*/ | |||
@Internal | |||
class XSSFBCellHeader { | |||
public static int length = 8; | |||
/** | |||
* | |||
* @param data raw data | |||
* @param offset offset at which to start reading the record | |||
* @param currentRow 0-based current row count | |||
* @param cell cell buffer to update | |||
*/ | |||
public static void parse(byte[] data, int offset, int currentRow, XSSFBCellHeader cell) { | |||
long colNum = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; | |||
int styleIdx = XSSFBUtils.get24BitInt(data, offset); offset += 3; | |||
//TODO: range checking | |||
boolean showPhonetic = false;//TODO: fill this out | |||
cell.reset(currentRow, (int)colNum, styleIdx, showPhonetic); | |||
} | |||
private int rowNum; | |||
private int colNum; | |||
private int styleIdx; | |||
private boolean showPhonetic; | |||
public void reset(int rowNum, int colNum, int styleIdx, boolean showPhonetic) { | |||
this.rowNum = rowNum; | |||
this.colNum = colNum; | |||
this.styleIdx = styleIdx; | |||
this.showPhonetic = showPhonetic; | |||
} | |||
int getColNum() { | |||
return colNum; | |||
} | |||
String formatAddressAsString() { | |||
return CellReference.convertNumToColString(colNum)+(rowNum+1); | |||
} | |||
int getStyleIdx() { | |||
return styleIdx; | |||
} | |||
} |
@@ -0,0 +1,54 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndian; | |||
@Internal | |||
class XSSFBCellRange { | |||
public final static int length = 4* LittleEndian.INT_SIZE; | |||
/** | |||
* Parses an RfX cell range from the data starting at the offset. | |||
* This performs no range checking. | |||
* @param data raw bytes | |||
* @param offset offset at which to start reading from data | |||
* @param cellRange to overwrite. If null, a new cellRange will be created. | |||
* @return a mutable cell range. | |||
*/ | |||
public static XSSFBCellRange parse(byte[] data, int offset, XSSFBCellRange cellRange) { | |||
if (cellRange == null) { | |||
cellRange = new XSSFBCellRange(); | |||
} | |||
cellRange.firstRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE; | |||
cellRange.lastRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE; | |||
cellRange.firstCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE; | |||
cellRange.lastCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); | |||
return cellRange; | |||
} | |||
int firstRow; | |||
int lastRow; | |||
int firstCol; | |||
int lastCol; | |||
} |
@@ -0,0 +1,112 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import org.apache.poi.ss.usermodel.ClientAnchor; | |||
import org.apache.poi.ss.usermodel.RichTextString; | |||
import org.apache.poi.ss.util.CellAddress; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.xssf.usermodel.XSSFComment; | |||
@Internal | |||
class XSSFBComment extends XSSFComment { | |||
private final CellAddress cellAddress; | |||
private final String author; | |||
private final XSSFBRichTextString comment; | |||
private boolean visible = true; | |||
XSSFBComment(CellAddress cellAddress, String author, String comment) { | |||
super(null, null, null); | |||
this.cellAddress = cellAddress; | |||
this.author = author; | |||
this.comment = new XSSFBRichTextString(comment); | |||
} | |||
@Override | |||
public void setVisible(boolean visible) { | |||
throw new IllegalArgumentException("XSSFBComment is read only."); | |||
} | |||
@Override | |||
public boolean isVisible() { | |||
return visible; | |||
} | |||
@Override | |||
public CellAddress getAddress() { | |||
return cellAddress; | |||
} | |||
@Override | |||
public void setAddress(CellAddress addr) { | |||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||
} | |||
@Override | |||
public void setAddress(int row, int col) { | |||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||
} | |||
@Override | |||
public int getRow() { | |||
return cellAddress.getRow(); | |||
} | |||
@Override | |||
public void setRow(int row) { | |||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||
} | |||
@Override | |||
public int getColumn() { | |||
return cellAddress.getColumn(); | |||
} | |||
@Override | |||
public void setColumn(int col) { | |||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||
} | |||
@Override | |||
public String getAuthor() { | |||
return author; | |||
} | |||
@Override | |||
public void setAuthor(String author) { | |||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||
} | |||
@Override | |||
public XSSFBRichTextString getString() { | |||
return comment; | |||
} | |||
@Override | |||
public void setString(RichTextString string) { | |||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||
} | |||
@Override | |||
public ClientAnchor getClientAnchor() { | |||
return null; | |||
} | |||
} |
@@ -0,0 +1,113 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.Comparator; | |||
import java.util.LinkedList; | |||
import java.util.List; | |||
import java.util.Map; | |||
import java.util.Queue; | |||
import java.util.TreeMap; | |||
import org.apache.poi.ss.util.CellAddress; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndian; | |||
@Internal | |||
public class XSSFBCommentsTable extends XSSFBParser { | |||
private Map<CellAddress, XSSFBComment> comments = new TreeMap<CellAddress, XSSFBComment>(new CellAddressComparator());//String is the cellAddress A1 | |||
private Queue<CellAddress> commentAddresses = new LinkedList<CellAddress>(); | |||
private List<String> authors = new ArrayList<String>(); | |||
//these are all used only during parsing, and they are mutable! | |||
private int authorId = -1; | |||
private CellAddress cellAddress = null; | |||
private XSSFBCellRange cellRange = null; | |||
private String comment = null; | |||
private StringBuilder authorBuffer = new StringBuilder(); | |||
public XSSFBCommentsTable(InputStream is) throws IOException { | |||
super(is); | |||
parse(); | |||
commentAddresses.addAll(comments.keySet()); | |||
} | |||
@Override | |||
public void handleRecord(int id, byte[] data) throws XSSFBParseException { | |||
XSSFBRecordType recordType = XSSFBRecordType.lookup(id); | |||
switch (recordType) { | |||
case BrtBeginComment: | |||
int offset = 0; | |||
authorId = XSSFBUtils.castToInt(LittleEndian.getUInt(data)); offset += LittleEndian.INT_SIZE; | |||
cellRange = XSSFBCellRange.parse(data, offset, cellRange); | |||
offset+= XSSFBCellRange.length; | |||
//for strict parsing; confirm that firstRow==lastRow and firstCol==colLats (2.4.28) | |||
cellAddress = new CellAddress(cellRange.firstRow, cellRange.firstCol); | |||
break; | |||
case BrtCommentText: | |||
XSSFBRichStr xssfbRichStr = XSSFBRichStr.build(data, 0); | |||
comment = xssfbRichStr.getString(); | |||
break; | |||
case BrtEndComment: | |||
comments.put(cellAddress, new XSSFBComment(cellAddress, authors.get(authorId), comment)); | |||
authorId = -1; | |||
cellAddress = null; | |||
break; | |||
case BrtCommentAuthor: | |||
authorBuffer.setLength(0); | |||
XSSFBUtils.readXLWideString(data, 0, authorBuffer); | |||
authors.add(authorBuffer.toString()); | |||
break; | |||
} | |||
} | |||
public Queue<CellAddress> getAddresses() { | |||
return commentAddresses; | |||
} | |||
public XSSFBComment get(CellAddress cellAddress) { | |||
if (cellAddress == null) { | |||
return null; | |||
} | |||
return comments.get(cellAddress); | |||
} | |||
private final static class CellAddressComparator implements Comparator<CellAddress> { | |||
@Override | |||
public int compare(CellAddress o1, CellAddress o2) { | |||
if (o1.getRow() < o2.getRow()) { | |||
return -1; | |||
} else if (o1.getRow() > o2.getRow()) { | |||
return 1; | |||
} | |||
if (o1.getColumn() < o2.getColumn()) { | |||
return -1; | |||
} else if (o1.getColumn() > o2.getColumn()) { | |||
return 1; | |||
} | |||
return 0; | |||
} | |||
} | |||
} |
@@ -0,0 +1,75 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper; | |||
@Internal | |||
class XSSFBHeaderFooter { | |||
private final String headerFooterTypeLabel; | |||
private final boolean isHeader; | |||
private String rawString; | |||
private HeaderFooterHelper headerFooterHelper = new HeaderFooterHelper(); | |||
XSSFBHeaderFooter(String headerFooterTypeLabel, boolean isHeader) { | |||
this.headerFooterTypeLabel = headerFooterTypeLabel; | |||
this.isHeader = isHeader; | |||
} | |||
String getHeaderFooterTypeLabel() { | |||
return headerFooterTypeLabel; | |||
} | |||
String getRawString() { | |||
return rawString; | |||
} | |||
String getString() { | |||
StringBuilder sb = new StringBuilder(); | |||
String left = headerFooterHelper.getLeftSection(rawString); | |||
String center = headerFooterHelper.getCenterSection(rawString); | |||
String right = headerFooterHelper.getRightSection(rawString); | |||
if (left != null && left.length() > 0) { | |||
sb.append(left); | |||
} | |||
if (center != null && center.length() > 0) { | |||
if (sb.length() > 0) { | |||
sb.append(" "); | |||
} | |||
sb.append(center); | |||
} | |||
if (right != null && right.length() > 0) { | |||
if (sb.length() > 0) { | |||
sb.append(" "); | |||
} | |||
sb.append(right); | |||
} | |||
return sb.toString(); | |||
} | |||
void setRawString(String rawString) { | |||
this.rawString = rawString; | |||
} | |||
boolean isHeader() { | |||
return isHeader; | |||
} | |||
} |
@@ -0,0 +1,87 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import org.apache.poi.util.Internal; | |||
@Internal | |||
class XSSFBHeaderFooters { | |||
public static XSSFBHeaderFooters parse(byte[] data) { | |||
boolean diffOddEven = false; | |||
boolean diffFirst = false; | |||
boolean scaleWDoc = false; | |||
boolean alignMargins = false; | |||
int offset = 2; | |||
XSSFBHeaderFooters xssfbHeaderFooter = new XSSFBHeaderFooters(); | |||
xssfbHeaderFooter.header = new XSSFBHeaderFooter("header", true); | |||
xssfbHeaderFooter.footer = new XSSFBHeaderFooter("footer", false); | |||
xssfbHeaderFooter.headerEven = new XSSFBHeaderFooter("evenHeader", true); | |||
xssfbHeaderFooter.footerEven = new XSSFBHeaderFooter("evenFooter", false); | |||
xssfbHeaderFooter.headerFirst = new XSSFBHeaderFooter("firstHeader", true); | |||
xssfbHeaderFooter.footerFirst = new XSSFBHeaderFooter("firstFooter", false); | |||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.header); | |||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footer); | |||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerEven); | |||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footerEven); | |||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerFirst); | |||
readHeaderFooter(data, offset, xssfbHeaderFooter.footerFirst); | |||
return xssfbHeaderFooter; | |||
} | |||
private static int readHeaderFooter(byte[] data, int offset, XSSFBHeaderFooter headerFooter) { | |||
if (offset + 4 >= data.length) { | |||
return 0; | |||
} | |||
StringBuilder sb = new StringBuilder(); | |||
int bytesRead = XSSFBUtils.readXLNullableWideString(data, offset, sb); | |||
headerFooter.setRawString(sb.toString()); | |||
return bytesRead; | |||
} | |||
private XSSFBHeaderFooter header; | |||
private XSSFBHeaderFooter footer; | |||
private XSSFBHeaderFooter headerEven; | |||
private XSSFBHeaderFooter footerEven; | |||
private XSSFBHeaderFooter headerFirst; | |||
private XSSFBHeaderFooter footerFirst; | |||
public XSSFBHeaderFooter getHeader() { | |||
return header; | |||
} | |||
public XSSFBHeaderFooter getFooter() { | |||
return footer; | |||
} | |||
public XSSFBHeaderFooter getHeaderEven() { | |||
return headerEven; | |||
} | |||
public XSSFBHeaderFooter getFooterEven() { | |||
return footerEven; | |||
} | |||
public XSSFBHeaderFooter getHeaderFirst() { | |||
return headerFirst; | |||
} | |||
public XSSFBHeaderFooter getFooterFirst() { | |||
return footerFirst; | |||
} | |||
} |
@@ -0,0 +1,181 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.BitSet; | |||
import java.util.Comparator; | |||
import java.util.HashMap; | |||
import java.util.List; | |||
import java.util.Map; | |||
import java.util.TreeMap; | |||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||
import org.apache.poi.openxml4j.opc.PackagePart; | |||
import org.apache.poi.openxml4j.opc.PackageRelationship; | |||
import org.apache.poi.ss.util.CellAddress; | |||
import org.apache.poi.ss.util.CellRangeAddress; | |||
import org.apache.poi.ss.util.CellRangeUtil; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||
@Internal | |||
public class XSSFBHyperlinksTable { | |||
private final static BitSet RECORDS = new BitSet(); | |||
static { | |||
RECORDS.set(XSSFBRecordType.BrtHLink.getId()); | |||
} | |||
private final List<XSSFHyperlinkRecord> hyperlinkRecords = new ArrayList<XSSFHyperlinkRecord>(); | |||
//cache the relId to hyperlink url from the sheet's .rels | |||
private Map<String, String> relIdToHyperlink = new HashMap<String, String>(); | |||
public XSSFBHyperlinksTable(PackagePart sheetPart) throws IOException { | |||
//load the urls from the sheet .rels | |||
loadUrlsFromSheetRels(sheetPart); | |||
//now load the hyperlinks from the bottom of the sheet | |||
HyperlinkSheetScraper scraper = new HyperlinkSheetScraper(sheetPart.getInputStream()); | |||
scraper.parse(); | |||
} | |||
/** | |||
* | |||
* @return a map of the hyperlinks. The key is the top left cell address in their CellRange | |||
*/ | |||
public Map<CellAddress, List<XSSFHyperlinkRecord>> getHyperLinks() { | |||
Map<CellAddress, List<XSSFHyperlinkRecord>> hyperlinkMap = | |||
new TreeMap<CellAddress, List<XSSFHyperlinkRecord>>(new TopLeftCellAddressComparator()); | |||
for (XSSFHyperlinkRecord hyperlinkRecord : hyperlinkRecords) { | |||
CellAddress cellAddress = new CellAddress(hyperlinkRecord.getCellRangeAddress().getFirstRow(), | |||
hyperlinkRecord.getCellRangeAddress().getFirstColumn()); | |||
List<XSSFHyperlinkRecord> list = hyperlinkMap.get(cellAddress); | |||
if (list == null) { | |||
list = new ArrayList<XSSFHyperlinkRecord>(); | |||
} | |||
list.add(hyperlinkRecord); | |||
hyperlinkMap.put(cellAddress, list); | |||
} | |||
return hyperlinkMap; | |||
} | |||
/** | |||
* | |||
* @param cellAddress cell address to find | |||
* @return null if not a hyperlink | |||
*/ | |||
public List<XSSFHyperlinkRecord> findHyperlinkRecord(CellAddress cellAddress) { | |||
List<XSSFHyperlinkRecord> overlapping = null; | |||
CellRangeAddress targetCellRangeAddress = new CellRangeAddress(cellAddress.getRow(), | |||
cellAddress.getRow(), | |||
cellAddress.getColumn(), | |||
cellAddress.getColumn()); | |||
for (XSSFHyperlinkRecord record : hyperlinkRecords) { | |||
if (CellRangeUtil.intersect(targetCellRangeAddress, record.getCellRangeAddress()) != CellRangeUtil.NO_INTERSECTION) { | |||
if (overlapping == null) { | |||
overlapping = new ArrayList<XSSFHyperlinkRecord>(); | |||
} | |||
overlapping.add(record); | |||
} | |||
} | |||
return overlapping; | |||
} | |||
private void loadUrlsFromSheetRels(PackagePart sheetPart) { | |||
try { | |||
for (PackageRelationship rel : sheetPart.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation())) { | |||
relIdToHyperlink.put(rel.getId(), rel.getTargetURI().toString()); | |||
} | |||
} catch (InvalidFormatException e) { | |||
//swallow | |||
} | |||
} | |||
private class HyperlinkSheetScraper extends XSSFBParser { | |||
private XSSFBCellRange hyperlinkCellRange = new XSSFBCellRange(); | |||
private final StringBuilder xlWideStringBuffer = new StringBuilder(); | |||
HyperlinkSheetScraper(InputStream is) { | |||
super(is, RECORDS); | |||
} | |||
@Override | |||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { | |||
if (recordType != XSSFBRecordType.BrtHLink.getId()) { | |||
return; | |||
} | |||
int offset = 0; | |||
String relId = ""; | |||
String location = ""; | |||
String toolTip = ""; | |||
String display = ""; | |||
hyperlinkCellRange = XSSFBCellRange.parse(data, offset, hyperlinkCellRange); | |||
offset += XSSFBCellRange.length; | |||
xlWideStringBuffer.setLength(0); | |||
offset += XSSFBUtils.readXLNullableWideString(data, offset, xlWideStringBuffer); | |||
relId = xlWideStringBuffer.toString(); | |||
xlWideStringBuffer.setLength(0); | |||
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer); | |||
location = xlWideStringBuffer.toString(); | |||
xlWideStringBuffer.setLength(0); | |||
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer); | |||
toolTip = xlWideStringBuffer.toString(); | |||
xlWideStringBuffer.setLength(0); | |||
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer); | |||
display = xlWideStringBuffer.toString(); | |||
CellRangeAddress cellRangeAddress = new CellRangeAddress(hyperlinkCellRange.firstRow, hyperlinkCellRange.lastRow, hyperlinkCellRange.firstCol, hyperlinkCellRange.lastCol); | |||
String url = relIdToHyperlink.get(relId); | |||
if (location == null || location.length() == 0) { | |||
location = url; | |||
} | |||
hyperlinkRecords.add( | |||
new XSSFHyperlinkRecord(cellRangeAddress, relId, location, toolTip, display) | |||
); | |||
} | |||
} | |||
private static class TopLeftCellAddressComparator implements Comparator<CellAddress> { | |||
@Override | |||
public int compare(CellAddress o1, CellAddress o2) { | |||
if (o1.getRow() < o2.getRow()) { | |||
return -1; | |||
} else if (o1.getRow() > o2.getRow()) { | |||
return 1; | |||
} | |||
if (o1.getColumn() < o2.getColumn()) { | |||
return -1; | |||
} else if (o1.getColumn() > o2.getColumn()) { | |||
return 1; | |||
} | |||
return 0; | |||
} | |||
} | |||
} |
@@ -0,0 +1,28 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
/** | |||
* Parse exception while reading an xssfb | |||
*/ | |||
public class XSSFBParseException extends RuntimeException { | |||
public XSSFBParseException(String msg) { | |||
super(msg); | |||
} | |||
} |
@@ -0,0 +1,105 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.BitSet; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndianInputStream; | |||
/** | |||
* Experimental parser for Microsoft's ooxml xssfb format. | |||
* Not thread safe, obviously. Need to create a new one | |||
* for each thread. | |||
*/ | |||
@Internal | |||
public abstract class XSSFBParser { | |||
private final LittleEndianInputStream is; | |||
private final BitSet records; | |||
public XSSFBParser(InputStream is) { | |||
this.is = new LittleEndianInputStream(is); | |||
records = null; | |||
} | |||
XSSFBParser(InputStream is, BitSet bitSet) { | |||
this.is = new LittleEndianInputStream(is); | |||
records = bitSet; | |||
} | |||
public void parse() throws IOException { | |||
while (true) { | |||
int bInt = is.read(); | |||
if (bInt == -1) { | |||
return; | |||
} | |||
readNext((byte) bInt); | |||
} | |||
} | |||
private void readNext(byte b1) throws IOException { | |||
int recordId = 0; | |||
//if highest bit == 1 | |||
if ((b1 >> 7 & 1) == 1) { | |||
byte b2 = is.readByte(); | |||
b1 &= ~(1<<7); //unset highest bit | |||
b2 &= ~(1<<7); //unset highest bit (if it exists?) | |||
recordId = (128*(int)b2)+(int)b1; | |||
} else { | |||
recordId = (int)b1; | |||
} | |||
long recordLength = 0; | |||
int i = 0; | |||
boolean halt = false; | |||
while (i < 4 && ! halt) { | |||
byte b = is.readByte(); | |||
halt = (b >> 7 & 1) == 0; //if highest bit !=1 then continue | |||
b &= ~(1<<7); | |||
recordLength += (int)b << (i*7); //multiply by 128^i | |||
i++; | |||
} | |||
if (records == null || records.get(recordId)) { | |||
//add sanity check for length? | |||
byte[] buff = new byte[(int) recordLength]; | |||
is.readFully(buff); | |||
handleRecord(recordId, buff); | |||
} else { | |||
long length = is.skip(recordLength); | |||
if (length != recordLength) { | |||
throw new XSSFBParseException("End of file reached before expected.\t"+ | |||
"Tried to skip "+recordLength + ", but only skipped "+length); | |||
} | |||
} | |||
} | |||
//It hurts, hurts, hurts to create a new byte array for every record. | |||
//However, on a large Excel spreadsheet, this parser was 1/3 faster than | |||
//the ooxml sax parser (5 seconds for xssfb and 7.5 seconds for xssf. | |||
//The code is far cleaner to have the parser read all | |||
//of the data rather than having every component promise that it read | |||
//the correct amount. | |||
abstract public void handleRecord(int recordType, byte[] data) throws XSSFBParseException; | |||
} |
@@ -0,0 +1,92 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import org.apache.poi.util.Internal; | |||
@Internal | |||
public enum XSSFBRecordType { | |||
BrtCellBlank(1), | |||
BrtCellRk(2), | |||
BrtCellError(3), | |||
BrtCellBool(4), | |||
BrtCellReal(5), | |||
BrtCellSt(6), | |||
BrtCellIsst(7), | |||
BrtFmlaString(8), | |||
BrtFmlaNum(9), | |||
BrtFmlaBool(10), | |||
BrtFmlaError(11), | |||
BrtRowHdr(0), | |||
BrtCellRString(62), | |||
BrtBeginSheet(129), | |||
BrtWsProp(147), | |||
BrtWsDim(148), | |||
BrtColInfo(60), | |||
BrtBeginSheetData(145), | |||
BrtEndSheetData(146), | |||
BrtHLink(494), | |||
BrtBeginHeaderFooter(479), | |||
//comments | |||
BrtBeginCommentAuthors(630), | |||
BrtEndCommentAuthors(631), | |||
BrtCommentAuthor(632), | |||
BrtBeginComment(635), | |||
BrtCommentText(637), | |||
BrtEndComment(636), | |||
//styles table | |||
BrtXf(47), | |||
BrtFmt(44), | |||
BrtBeginFmts(615), | |||
BrtEndFmts(616), | |||
BrtBeginCellXFs(617), | |||
BrtEndCellXFs(618), | |||
BrtBeginCellStyleXFS(626), | |||
BrtEndCellStyleXFS(627), | |||
//stored strings table | |||
BrtSstItem(19), //stored strings items | |||
BrtBeginSst(159), //stored strings begin sst | |||
BrtEndSst(160), //stored strings end sst | |||
BrtBundleSh(156), //defines worksheet in wb part | |||
Unimplemented(-1); | |||
private final int id; | |||
XSSFBRecordType(int id) { | |||
this.id = id; | |||
} | |||
public int getId() { | |||
return id; | |||
} | |||
public static XSSFBRecordType lookup(int id) { | |||
for (XSSFBRecordType r : XSSFBRecordType.values()) { | |||
if (r.id == id) { | |||
return r; | |||
} | |||
} | |||
return Unimplemented; | |||
} | |||
} |
@@ -0,0 +1,85 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.Iterator; | |||
import org.apache.poi.POIXMLDocumentPart; | |||
import org.apache.poi.POIXMLRelation; | |||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||
import org.apache.poi.openxml4j.opc.PackagePart; | |||
import org.apache.poi.openxml4j.opc.PackagePartName; | |||
import org.apache.poi.openxml4j.opc.PackageRelationship; | |||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; | |||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; | |||
import org.apache.poi.openxml4j.opc.PackagingURIHelper; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
/** | |||
* Need to have this mirror class of {@link org.apache.poi.xssf.usermodel.XSSFRelation} | |||
* because of conflicts with regular ooxml relations. | |||
* If we failed to break this into a separate class, in the cases of SharedStrings and Styles, | |||
* 2 parts would exist, and "Packages shall not contain equivalent part names..." | |||
* <p> | |||
* Also, we need to avoid the possibility of breaking the marshalling process for xml. | |||
*/ | |||
@Internal | |||
public class XSSFBRelation extends POIXMLRelation { | |||
private static final POILogger log = POILogFactory.getLogger(XSSFBRelation.class); | |||
static final XSSFBRelation SHARED_STRINGS_BINARY = new XSSFBRelation( | |||
"application/vnd.ms-excel.sharedStrings", | |||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings", | |||
"/xl/sharedStrings.bin", | |||
null | |||
); | |||
public static final XSSFBRelation STYLES_BINARY = new XSSFBRelation( | |||
"application/vnd.ms-excel.styles", | |||
PackageRelationshipTypes.STYLE_PART, | |||
"/xl/styles.bin", | |||
null | |||
); | |||
private XSSFBRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) { | |||
super(type, rel, defaultName, cls); | |||
} | |||
/** | |||
* Fetches the InputStream to read the contents, based | |||
* of the specified core part, for which we are defined | |||
* as a suitable relationship | |||
*/ | |||
public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException { | |||
PackageRelationshipCollection prc = | |||
corePart.getRelationshipsByType(getRelation()); | |||
Iterator<PackageRelationship> it = prc.iterator(); | |||
if (it.hasNext()) { | |||
PackageRelationship rel = it.next(); | |||
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); | |||
PackagePart part = corePart.getPackage().getPart(relName); | |||
return part.getInputStream(); | |||
} | |||
log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found"); | |||
return null; | |||
} | |||
} |
@@ -0,0 +1,47 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import org.apache.poi.util.Internal; | |||
@Internal | |||
class XSSFBRichStr { | |||
public static XSSFBRichStr build(byte[] bytes, int offset) throws XSSFBParseException { | |||
byte first = bytes[offset]; | |||
boolean dwSizeStrRunExists = (first >> 7 & 1) == 1;//first bit == 1? | |||
boolean phoneticExists = (first >> 6 & 1) == 1;//second bit == 1? | |||
StringBuilder sb = new StringBuilder(); | |||
int read = XSSFBUtils.readXLWideString(bytes, offset+1, sb); | |||
//TODO: parse phonetic strings. | |||
return new XSSFBRichStr(sb.toString(), ""); | |||
} | |||
private final String string; | |||
private final String phoneticString; | |||
XSSFBRichStr(String string, String phoneticString) { | |||
this.string = string; | |||
this.phoneticString = phoneticString; | |||
} | |||
public String getString() { | |||
return string; | |||
} | |||
} |
@@ -0,0 +1,80 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import org.apache.poi.ss.usermodel.Font; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.xssf.usermodel.XSSFRichTextString; | |||
/** | |||
* Wrapper class around String so that we can use it in Comment. | |||
* Nothing has been implemented yet except for {@link #getString()}. | |||
*/ | |||
@Internal | |||
class XSSFBRichTextString extends XSSFRichTextString { | |||
private final String string; | |||
XSSFBRichTextString(String string) { | |||
this.string = string; | |||
} | |||
@Override | |||
public void applyFont(int startIndex, int endIndex, short fontIndex) { | |||
} | |||
@Override | |||
public void applyFont(int startIndex, int endIndex, Font font) { | |||
} | |||
@Override | |||
public void applyFont(Font font) { | |||
} | |||
@Override | |||
public void clearFormatting() { | |||
} | |||
@Override | |||
public String getString() { | |||
return string; | |||
} | |||
@Override | |||
public int length() { | |||
return string.length(); | |||
} | |||
@Override | |||
public int numFormattingRuns() { | |||
return 0; | |||
} | |||
@Override | |||
public int getIndexOfFormattingRun(int index) { | |||
return 0; | |||
} | |||
@Override | |||
public void applyFont(short fontIndex) { | |||
} | |||
} |
@@ -0,0 +1,137 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.List; | |||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||
import org.apache.poi.openxml4j.opc.PackagePart; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.xml.sax.SAXException; | |||
@Internal | |||
public class XSSFBSharedStringsTable { | |||
/** | |||
* An integer representing the total count of strings in the workbook. This count does not | |||
* include any numbers, it counts only the total of text strings in the workbook. | |||
*/ | |||
private int count; | |||
/** | |||
* An integer representing the total count of unique strings in the Shared String Table. | |||
* A string is unique even if it is a copy of another string, but has different formatting applied | |||
* at the character level. | |||
*/ | |||
private int uniqueCount; | |||
/** | |||
* The shared strings table. | |||
*/ | |||
private List<String> strings = new ArrayList<String>(); | |||
/** | |||
* @param pkg The {@link OPCPackage} to use as basis for the shared-strings table. | |||
* @throws IOException If reading the data from the package fails. | |||
* @throws SAXException if parsing the XML data fails. | |||
*/ | |||
public XSSFBSharedStringsTable(OPCPackage pkg) | |||
throws IOException, SAXException { | |||
ArrayList<PackagePart> parts = | |||
pkg.getPartsByContentType(XSSFBRelation.SHARED_STRINGS_BINARY.getContentType()); | |||
// Some workbooks have no shared strings table. | |||
if (parts.size() > 0) { | |||
PackagePart sstPart = parts.get(0); | |||
readFrom(sstPart.getInputStream()); | |||
} | |||
} | |||
/** | |||
* Like POIXMLDocumentPart constructor | |||
* | |||
* @since POI 3.14-Beta3 | |||
*/ | |||
XSSFBSharedStringsTable(PackagePart part) throws IOException, SAXException { | |||
readFrom(part.getInputStream()); | |||
} | |||
private void readFrom(InputStream inputStream) throws IOException { | |||
SSTBinaryReader reader = new SSTBinaryReader(inputStream); | |||
reader.parse(); | |||
} | |||
public List<String> getItems() { | |||
return strings; | |||
} | |||
public String getEntryAt(int i) { | |||
return strings.get(i); | |||
} | |||
/** | |||
* Return an integer representing the total count of strings in the workbook. This count does not | |||
* include any numbers, it counts only the total of text strings in the workbook. | |||
* | |||
* @return the total count of strings in the workbook | |||
*/ | |||
public int getCount() { | |||
return this.count; | |||
} | |||
/** | |||
* Returns an integer representing the total count of unique strings in the Shared String Table. | |||
* A string is unique even if it is a copy of another string, but has different formatting applied | |||
* at the character level. | |||
* | |||
* @return the total count of unique strings in the workbook | |||
*/ | |||
public int getUniqueCount() { | |||
return this.uniqueCount; | |||
} | |||
private class SSTBinaryReader extends XSSFBParser { | |||
SSTBinaryReader(InputStream is) { | |||
super(is); | |||
} | |||
@Override | |||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { | |||
XSSFBRecordType type = XSSFBRecordType.lookup(recordType); | |||
switch (type) { | |||
case BrtSstItem: | |||
XSSFBRichStr rstr = XSSFBRichStr.build(data, 0); | |||
strings.add(rstr.getString()); | |||
break; | |||
case BrtBeginSst: | |||
count = (int) LittleEndian.getUInt(data,0); | |||
uniqueCount = (int) LittleEndian.getUInt(data, 4); | |||
break; | |||
} | |||
} | |||
} | |||
} |
@@ -0,0 +1,329 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import java.io.InputStream; | |||
import java.util.Queue; | |||
import org.apache.poi.ss.usermodel.DataFormatter; | |||
import org.apache.poi.ss.util.CellAddress; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; | |||
import org.apache.poi.xssf.usermodel.XSSFComment; | |||
import org.apache.poi.xssf.usermodel.XSSFRichTextString; | |||
@Internal | |||
public class XSSFBSheetHandler extends XSSFBParser { | |||
private final static int CHECK_ALL_ROWS = -1; | |||
private final XSSFBSharedStringsTable stringsTable; | |||
private final XSSFSheetXMLHandler.SheetContentsHandler handler; | |||
private final XSSFBStylesTable styles; | |||
private final XSSFBCommentsTable comments; | |||
private final DataFormatter dataFormatter; | |||
private final boolean formulasNotResults;//TODO: implement this | |||
private int lastEndedRow = -1; | |||
private int lastStartedRow = -1; | |||
private int currentRow = 0; | |||
private byte[] rkBuffer = new byte[8]; | |||
private XSSFBCellRange hyperlinkCellRange = null; | |||
private StringBuilder xlWideStringBuffer = new StringBuilder(); | |||
private final XSSFBCellHeader cellBuffer = new XSSFBCellHeader(); | |||
public XSSFBSheetHandler(InputStream is, | |||
XSSFBStylesTable styles, | |||
XSSFBCommentsTable comments, | |||
XSSFBSharedStringsTable strings, | |||
XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler, | |||
DataFormatter dataFormatter, | |||
boolean formulasNotResults) { | |||
super(is); | |||
this.styles = styles; | |||
this.comments = comments; | |||
this.stringsTable = strings; | |||
this.handler = sheetContentsHandler; | |||
this.dataFormatter = dataFormatter; | |||
this.formulasNotResults = formulasNotResults; | |||
} | |||
@Override | |||
public void handleRecord(int id, byte[] data) throws XSSFBParseException { | |||
XSSFBRecordType type = XSSFBRecordType.lookup(id); | |||
switch(type) { | |||
case BrtRowHdr: | |||
long rw = LittleEndian.getUInt(data, 0); | |||
if (rw > 0x00100000L) {//could make sure this is larger than currentRow, according to spec? | |||
throw new XSSFBParseException("Row number beyond allowable range: "+rw); | |||
} | |||
currentRow = (int)rw; | |||
checkMissedComments(currentRow); | |||
startRow(currentRow); | |||
break; | |||
case BrtCellIsst: | |||
handleBrtCellIsst(data); | |||
break; | |||
case BrtCellSt: //TODO: needs test | |||
handleCellSt(data); | |||
break; | |||
case BrtCellRk: | |||
handleCellRk(data); | |||
break; | |||
case BrtCellReal: | |||
handleCellReal(data); | |||
break; | |||
case BrtCellBool: | |||
handleBoolean(data); | |||
break; | |||
case BrtCellError: | |||
handleCellError(data); | |||
break; | |||
case BrtCellBlank: | |||
beforeCellValue(data);//read cell info and check for missing comments | |||
break; | |||
case BrtFmlaString: | |||
handleFmlaString(data); | |||
break; | |||
case BrtFmlaNum: | |||
handleFmlaNum(data); | |||
break; | |||
case BrtFmlaError: | |||
handleFmlaError(data); | |||
break; | |||
//TODO: All the PCDI and PCDIA | |||
case BrtEndSheetData: | |||
checkMissedComments(CHECK_ALL_ROWS); | |||
endRow(lastStartedRow); | |||
break; | |||
case BrtBeginHeaderFooter: | |||
handleHeaderFooter(data); | |||
break; | |||
} | |||
} | |||
private void beforeCellValue(byte[] data) { | |||
XSSFBCellHeader.parse(data, 0, currentRow, cellBuffer); | |||
checkMissedComments(currentRow, cellBuffer.getColNum()); | |||
} | |||
private void handleCellValue(String formattedValue) { | |||
CellAddress cellAddress = new CellAddress(currentRow, cellBuffer.getColNum()); | |||
XSSFBComment comment = null; | |||
if (comments != null) { | |||
comment = comments.get(cellAddress); | |||
} | |||
handler.cell(cellAddress.formatAsString(), formattedValue, comment); | |||
} | |||
private void handleFmlaNum(byte[] data) { | |||
beforeCellValue(data); | |||
//xNum | |||
double val = LittleEndian.getDouble(data, XSSFBCellHeader.length); | |||
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx()); | |||
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString); | |||
handleCellValue(formattedVal); | |||
} | |||
private void handleCellSt(byte[] data) { | |||
beforeCellValue(data); | |||
xlWideStringBuffer.setLength(0); | |||
XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer); | |||
handleCellValue(xlWideStringBuffer.toString()); | |||
} | |||
private void handleFmlaString(byte[] data) { | |||
beforeCellValue(data); | |||
xlWideStringBuffer.setLength(0); | |||
XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer); | |||
handleCellValue(xlWideStringBuffer.toString()); | |||
} | |||
private void handleCellError(byte[] data) { | |||
beforeCellValue(data); | |||
//TODO, read byte to figure out the type of error | |||
handleCellValue("ERROR"); | |||
} | |||
private void handleFmlaError(byte[] data) { | |||
beforeCellValue(data); | |||
//TODO, read byte to figure out the type of error | |||
handleCellValue("ERROR"); | |||
} | |||
private void handleBoolean(byte[] data) { | |||
beforeCellValue(data); | |||
String formattedVal = (data[XSSFBCellHeader.length] == 1) ? "TRUE" : "FALSE"; | |||
handleCellValue(formattedVal); | |||
} | |||
private void handleCellReal(byte[] data) { | |||
beforeCellValue(data); | |||
//xNum | |||
double val = LittleEndian.getDouble(data, XSSFBCellHeader.length); | |||
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx()); | |||
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString); | |||
handleCellValue(formattedVal); | |||
} | |||
private void handleCellRk(byte[] data) { | |||
beforeCellValue(data); | |||
double val = rkNumber(data, XSSFBCellHeader.length); | |||
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx()); | |||
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString); | |||
handleCellValue(formattedVal); | |||
} | |||
private void handleBrtCellIsst(byte[] data) { | |||
beforeCellValue(data); | |||
long idx = LittleEndian.getUInt(data, XSSFBCellHeader.length); | |||
//check for out of range, buffer overflow | |||
XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt((int)idx)); | |||
handleCellValue(rtss.getString()); | |||
} | |||
private void handleHeaderFooter(byte[] data) { | |||
XSSFBHeaderFooters headerFooter = XSSFBHeaderFooters.parse(data); | |||
outputHeaderFooter(headerFooter.getHeader()); | |||
outputHeaderFooter(headerFooter.getFooter()); | |||
outputHeaderFooter(headerFooter.getHeaderEven()); | |||
outputHeaderFooter(headerFooter.getFooterEven()); | |||
outputHeaderFooter(headerFooter.getHeaderFirst()); | |||
outputHeaderFooter(headerFooter.getFooterFirst()); | |||
} | |||
private void outputHeaderFooter(XSSFBHeaderFooter headerFooter) { | |||
String text = headerFooter.getString(); | |||
if (text != null && text.trim().length() > 0) { | |||
handler.headerFooter(text, headerFooter.isHeader(), headerFooter.getHeaderFooterTypeLabel()); | |||
} | |||
} | |||
//at start of next cell or end of row, return the cellAddress if it equals currentRow and col | |||
private void checkMissedComments(int currentRow, int colNum) { | |||
if (comments == null) { | |||
return; | |||
} | |||
Queue<CellAddress> queue = comments.getAddresses(); | |||
while (queue.size() > 0) { | |||
CellAddress cellAddress = queue.peek(); | |||
if (cellAddress.getRow() == currentRow && cellAddress.getColumn() < colNum) { | |||
cellAddress = queue.remove(); | |||
dumpEmptyCellComment(cellAddress, comments.get(cellAddress)); | |||
} else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() == colNum) { | |||
queue.remove(); | |||
return; | |||
} else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() > colNum) { | |||
return; | |||
} else if (cellAddress.getRow() > currentRow) { | |||
return; | |||
} | |||
} | |||
} | |||
//check for anything from rows before | |||
private void checkMissedComments(int currentRow) { | |||
if (comments == null) { | |||
return; | |||
} | |||
Queue<CellAddress> queue = comments.getAddresses(); | |||
int lastInterpolatedRow = -1; | |||
while (queue.size() > 0) { | |||
CellAddress cellAddress = queue.peek(); | |||
if (currentRow == CHECK_ALL_ROWS || cellAddress.getRow() < currentRow) { | |||
cellAddress = queue.remove(); | |||
if (cellAddress.getRow() != lastInterpolatedRow) { | |||
startRow(cellAddress.getRow()); | |||
} | |||
dumpEmptyCellComment(cellAddress, comments.get(cellAddress)); | |||
lastInterpolatedRow = cellAddress.getRow(); | |||
} else { | |||
break; | |||
} | |||
} | |||
} | |||
private void startRow(int row) { | |||
if (row == lastStartedRow) { | |||
return; | |||
} | |||
if (lastStartedRow != lastEndedRow) { | |||
endRow(lastStartedRow); | |||
} | |||
handler.startRow(row); | |||
lastStartedRow = row; | |||
} | |||
private void endRow(int row) { | |||
if (lastEndedRow == row) { | |||
return; | |||
} | |||
handler.endRow(row); | |||
lastEndedRow = row; | |||
} | |||
private void dumpEmptyCellComment(CellAddress cellAddress, XSSFBComment comment) { | |||
handler.cell(cellAddress.formatAsString(), null, comment); | |||
} | |||
private double rkNumber(byte[] data, int offset) { | |||
//see 2.5.122 for this abomination | |||
byte b0 = data[offset]; | |||
String s = Integer.toString(b0, 2); | |||
boolean numDivBy100 = ((b0 & 1) == 1); // else as is | |||
boolean floatingPoint = ((b0 >> 1 & 1) == 0); // else signed integer | |||
//unset highest 2 bits | |||
b0 &= ~1; | |||
b0 &= ~(1<<1); | |||
rkBuffer[4] = b0; | |||
for (int i = 1; i < 4; i++) { | |||
rkBuffer[i+4] = data[offset+i]; | |||
} | |||
double d = 0.0; | |||
if (floatingPoint) { | |||
d = LittleEndian.getDouble(rkBuffer); | |||
} else { | |||
d = LittleEndian.getInt(rkBuffer); | |||
} | |||
d = (numDivBy100) ? d/100 : d; | |||
return d; | |||
} | |||
/** | |||
* You need to implement this to handle the results | |||
* of the sheet parsing. | |||
*/ | |||
public interface SheetContentsHandler extends XSSFSheetXMLHandler.SheetContentsHandler { | |||
/** | |||
* A cell, with the given formatted value (may be null), | |||
* a url (may be null), a toolTip (may be null) | |||
* and possibly a comment (may be null), was encountered */ | |||
void hyperlinkCell(String cellReference, String formattedValue, String url, String toolTip, XSSFComment comment); | |||
} | |||
} |
@@ -0,0 +1,101 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.List; | |||
import java.util.SortedMap; | |||
import java.util.TreeMap; | |||
import org.apache.poi.POIXMLException; | |||
import org.apache.poi.ss.usermodel.BuiltinFormats; | |||
import org.apache.poi.util.Internal; | |||
@Internal | |||
public class XSSFBStylesTable extends XSSFBParser { | |||
private final SortedMap<Short, String> numberFormats = new TreeMap<Short,String>(); | |||
private final List<Short> styleIds = new ArrayList<Short>(); | |||
private boolean inCellXFS = false; | |||
private boolean inFmts = false; | |||
public XSSFBStylesTable(InputStream is) throws IOException { | |||
super(is); | |||
parse(); | |||
} | |||
String getNumberFormatString(int idx) { | |||
if (numberFormats.containsKey(styleIds.get((short)idx))) { | |||
return numberFormats.get(styleIds.get((short)idx)); | |||
} | |||
return BuiltinFormats.getBuiltinFormat(styleIds.get((short)idx)); | |||
} | |||
@Override | |||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { | |||
XSSFBRecordType type = XSSFBRecordType.lookup(recordType); | |||
switch (type) { | |||
case BrtBeginCellXFs: | |||
inCellXFS = true; | |||
break; | |||
case BrtEndCellXFs: | |||
inCellXFS = false; | |||
break; | |||
case BrtXf: | |||
if (inCellXFS) { | |||
handleBrtXFInCellXF(data); | |||
} | |||
break; | |||
case BrtBeginFmts: | |||
inFmts = true; | |||
break; | |||
case BrtEndFmts: | |||
inFmts = false; | |||
break; | |||
case BrtFmt: | |||
if (inFmts) { | |||
handleFormat(data); | |||
} | |||
break; | |||
} | |||
} | |||
private void handleFormat(byte[] data) { | |||
int ifmt = data[0] & 0xFF; | |||
if (ifmt > Short.MAX_VALUE) { | |||
throw new POIXMLException("Format id must be a short"); | |||
} | |||
StringBuilder sb = new StringBuilder(); | |||
XSSFBUtils.readXLWideString(data, 2, sb); | |||
String fmt = sb.toString(); | |||
numberFormats.put((short)ifmt, fmt); | |||
} | |||
private void handleBrtXFInCellXF(byte[] data) { | |||
int ifmtOffset = 2; | |||
//int ifmtLength = 2; | |||
//numFmtId in xml terms | |||
int ifmt = data[ifmtOffset] & 0xFF;//the second byte is ignored | |||
styleIds.add((short)ifmt); | |||
} | |||
} |
@@ -0,0 +1,108 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import java.nio.charset.Charset; | |||
import org.apache.poi.POIXMLException; | |||
import org.apache.poi.util.Internal; | |||
import org.apache.poi.util.LittleEndian; | |||
@Internal | |||
public class XSSFBUtils { | |||
/** | |||
* Reads an XLNullableWideString. | |||
* @param data data from which to read | |||
* @param offset in data from which to start | |||
* @param sb buffer to which to write. You must setLength(0) before calling! | |||
* @return number of bytes read | |||
* @throws XSSFBParseException if there was an exception during reading | |||
*/ | |||
static int readXLNullableWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException { | |||
long numChars = LittleEndian.getUInt(data, offset); | |||
if (numChars < 0) { | |||
throw new XSSFBParseException("too few chars to read"); | |||
} else if (numChars == 0xFFFFFFFFL) { //this means null value (2.5.166), do not read any bytes!!! | |||
return 0; | |||
} else if (numChars > 0xFFFFFFFFL) { | |||
throw new XSSFBParseException("too many chars to read"); | |||
} | |||
int numBytes = 2*(int)numChars; | |||
offset += 4; | |||
if (offset+numBytes > data.length) { | |||
throw new XSSFBParseException("trying to read beyond data length:" + | |||
"offset="+offset+", numBytes="+numBytes+", data.length="+data.length); | |||
} | |||
sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE"))); | |||
numBytes+=4; | |||
return numBytes; | |||
} | |||
/** | |||
* Reads an XLNullableWideString. | |||
* @param data data from which to read | |||
* @param offset in data from which to start | |||
* @param sb buffer to which to write. You must setLength(0) before calling! | |||
* @return number of bytes read | |||
* @throws XSSFBParseException if there was an exception while trying to read the string | |||
*/ | |||
public static int readXLWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException { | |||
long numChars = LittleEndian.getUInt(data, offset); | |||
if (numChars < 0) { | |||
throw new XSSFBParseException("too few chars to read"); | |||
} else if (numChars > 0xFFFFFFFFL) { | |||
throw new XSSFBParseException("too many chars to read"); | |||
} | |||
int numBytes = 2*(int)numChars; | |||
offset += 4; | |||
if (offset+numBytes > data.length) { | |||
throw new XSSFBParseException("trying to read beyond data length"); | |||
} | |||
sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE"))); | |||
numBytes+=4; | |||
return numBytes; | |||
} | |||
static int castToInt(long val) { | |||
if (val < Integer.MAX_VALUE && val > Integer.MIN_VALUE) { | |||
return (int)val; | |||
} | |||
throw new POIXMLException("val ("+val+") can't be cast to int"); | |||
} | |||
static short castToShort(int val) { | |||
if (val < Short.MAX_VALUE && val > Short.MIN_VALUE) { | |||
return (short)val; | |||
} | |||
throw new POIXMLException("val ("+val+") can't be cast to short"); | |||
} | |||
//TODO: move to LittleEndian? | |||
static int get24BitInt( byte[] data, int offset) { | |||
int i = offset; | |||
int b0 = data[i++] & 0xFF; | |||
int b1 = data[i++] & 0xFF; | |||
int b2 = data[i] & 0xFF; | |||
return ( b2 << 16 ) + ( b1 << 8 ) + b0; | |||
} | |||
} |
@@ -0,0 +1,117 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import org.apache.poi.ss.util.CellRangeAddress; | |||
import org.apache.poi.util.Internal; | |||
/** | |||
* This is a read only record that maintains information about | |||
* a hyperlink. In OOXML land, this information has to be merged | |||
* from 1) the sheet's .rels to get the url and 2) from after the | |||
* sheet data in they hyperlink section. | |||
* | |||
* The {@link #display} is often empty and should be filled from | |||
* the contents of the anchor cell. | |||
* | |||
*/ | |||
@Internal | |||
public class XSSFHyperlinkRecord { | |||
private final CellRangeAddress cellRangeAddress; | |||
private final String relId; | |||
private String location; | |||
private String toolTip; | |||
private String display; | |||
XSSFHyperlinkRecord(CellRangeAddress cellRangeAddress, String relId, String location, String toolTip, String display) { | |||
this.cellRangeAddress = cellRangeAddress; | |||
this.relId = relId; | |||
this.location = location; | |||
this.toolTip = toolTip; | |||
this.display = display; | |||
} | |||
void setLocation(String location) { | |||
this.location = location; | |||
} | |||
void setToolTip(String toolTip) { | |||
this.toolTip = toolTip; | |||
} | |||
void setDisplay(String display) { | |||
this.display = display; | |||
} | |||
CellRangeAddress getCellRangeAddress() { | |||
return cellRangeAddress; | |||
} | |||
public String getRelId() { | |||
return relId; | |||
} | |||
public String getLocation() { | |||
return location; | |||
} | |||
public String getToolTip() { | |||
return toolTip; | |||
} | |||
public String getDisplay() { | |||
return display; | |||
} | |||
@Override | |||
public boolean equals(Object o) { | |||
if (this == o) return true; | |||
if (o == null || getClass() != o.getClass()) return false; | |||
XSSFHyperlinkRecord that = (XSSFHyperlinkRecord) o; | |||
if (cellRangeAddress != null ? !cellRangeAddress.equals(that.cellRangeAddress) : that.cellRangeAddress != null) | |||
return false; | |||
if (relId != null ? !relId.equals(that.relId) : that.relId != null) return false; | |||
if (location != null ? !location.equals(that.location) : that.location != null) return false; | |||
if (toolTip != null ? !toolTip.equals(that.toolTip) : that.toolTip != null) return false; | |||
return display != null ? display.equals(that.display) : that.display == null; | |||
} | |||
@Override | |||
public int hashCode() { | |||
int result = cellRangeAddress != null ? cellRangeAddress.hashCode() : 0; | |||
result = 31 * result + (relId != null ? relId.hashCode() : 0); | |||
result = 31 * result + (location != null ? location.hashCode() : 0); | |||
result = 31 * result + (toolTip != null ? toolTip.hashCode() : 0); | |||
result = 31 * result + (display != null ? display.hashCode() : 0); | |||
return result; | |||
} | |||
@Override | |||
public String toString() { | |||
return "XSSFHyperlinkRecord{" + | |||
"cellRangeAddress=" + cellRangeAddress + | |||
", relId='" + relId + '\'' + | |||
", location='" + location + '\'' + | |||
", toolTip='" + toolTip + '\'' + | |||
", display='" + display + '\'' + | |||
'}'; | |||
} | |||
} |
@@ -0,0 +1,44 @@ | |||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> | |||
<!-- | |||
==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== | |||
--> | |||
<html> | |||
<head> | |||
</head> | |||
<body bgcolor="white"> | |||
<p>The org.apache.poi.xssf.binary package includes necessary underlying components | |||
for streaming/read-only processing of xlsb files. | |||
</p> | |||
<p> | |||
POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader | |||
in o.a.p.xssf.eventusermodel. | |||
</p> | |||
<p> | |||
This feature was added in poi-3.15-beta3 and should be considered experimental. Most classes | |||
have been marked @Internal and the API is subject to change. | |||
</p> | |||
<h2>Related Documentation</h2> | |||
For overviews, tutorials, examples, guides, and tool documentation, please see: | |||
<ul> | |||
<li><a href="http://poi.apache.org">Apache POI Project</a> | |||
</ul> | |||
</body> | |||
</html> |
@@ -0,0 +1,172 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.eventusermodel; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.Iterator; | |||
import java.util.LinkedList; | |||
import java.util.List; | |||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | |||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||
import org.apache.poi.openxml4j.opc.PackagePart; | |||
import org.apache.poi.openxml4j.opc.PackagePartName; | |||
import org.apache.poi.openxml4j.opc.PackageRelationship; | |||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; | |||
import org.apache.poi.openxml4j.opc.PackagingURIHelper; | |||
import org.apache.poi.util.LittleEndian; | |||
import org.apache.poi.xssf.binary.XSSFBCommentsTable; | |||
import org.apache.poi.xssf.binary.XSSFBParseException; | |||
import org.apache.poi.xssf.binary.XSSFBParser; | |||
import org.apache.poi.xssf.binary.XSSFBRecordType; | |||
import org.apache.poi.xssf.binary.XSSFBRelation; | |||
import org.apache.poi.xssf.binary.XSSFBStylesTable; | |||
import org.apache.poi.xssf.binary.XSSFBUtils; | |||
import org.apache.poi.xssf.model.CommentsTable; | |||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||
/** | |||
* Reader for xlsb files. | |||
*/ | |||
public class XSSFBReader extends XSSFReader { | |||
/** | |||
* Creates a new XSSFReader, for the given package | |||
* | |||
* @param pkg opc package | |||
*/ | |||
public XSSFBReader(OPCPackage pkg) throws IOException, OpenXML4JException { | |||
super(pkg); | |||
} | |||
/** | |||
* Returns an Iterator which will let you get at all the | |||
* different Sheets in turn. | |||
* Each sheet's InputStream is only opened when fetched | |||
* from the Iterator. It's up to you to close the | |||
* InputStreams when done with each one. | |||
*/ | |||
@Override | |||
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException { | |||
return new SheetIterator(workbookPart); | |||
} | |||
public XSSFBStylesTable getXSSFBStylesTable() throws IOException { | |||
ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFBRelation.STYLES_BINARY.getContentType()); | |||
if(parts.size() == 0) return null; | |||
// Create the Styles Table, and associate the Themes if present | |||
return new XSSFBStylesTable(parts.get(0).getInputStream()); | |||
} | |||
public static class SheetIterator extends XSSFReader.SheetIterator { | |||
/** | |||
* Construct a new SheetIterator | |||
* | |||
* @param wb package part holding workbook.xml | |||
*/ | |||
private SheetIterator(PackagePart wb) throws IOException { | |||
super(wb); | |||
} | |||
Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException { | |||
SheetRefLoader sheetRefLoader = new SheetRefLoader(wb.getInputStream()); | |||
sheetRefLoader.parse(); | |||
return sheetRefLoader.getSheets().iterator(); | |||
} | |||
/** | |||
* Not supported by XSSFBReader's SheetIterator. | |||
* Please use {@link #getXSSFBSheetComments()} instead. | |||
* @return nothing, always throws IllegalArgumentException! | |||
*/ | |||
@Override | |||
public CommentsTable getSheetComments() { | |||
throw new IllegalArgumentException("Please use getXSSFBSheetComments"); | |||
} | |||
public XSSFBCommentsTable getXSSFBSheetComments() { | |||
PackagePart sheetPkg = getSheetPart(); | |||
// Do we have a comments relationship? (Only ever one if so) | |||
try { | |||
PackageRelationshipCollection commentsList = | |||
sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation()); | |||
if (commentsList.size() > 0) { | |||
PackageRelationship comments = commentsList.getRelationship(0); | |||
if (comments == null || comments.getTargetURI() == null) { | |||
return null; | |||
} | |||
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI()); | |||
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName); | |||
return new XSSFBCommentsTable(commentsPart.getInputStream()); | |||
} | |||
} catch (InvalidFormatException e) { | |||
return null; | |||
} catch (IOException e) { | |||
return null; | |||
} | |||
return null; | |||
} | |||
} | |||
private static class SheetRefLoader extends XSSFBParser { | |||
List<XSSFSheetRef> sheets = new LinkedList<XSSFSheetRef>(); | |||
private SheetRefLoader(InputStream is) { | |||
super(is); | |||
} | |||
@Override | |||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { | |||
if (recordType == XSSFBRecordType.BrtBundleSh.getId()) { | |||
addWorksheet(data); | |||
} | |||
} | |||
private void addWorksheet(byte[] data) { | |||
int offset = 0; | |||
//this is the sheet state #2.5.142 | |||
long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; | |||
long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; | |||
//according to #2.4.304 | |||
if (iTabID < 1 || iTabID > 0x0000FFFFL) { | |||
throw new XSSFBParseException("table id out of range: "+iTabID); | |||
} | |||
StringBuilder sb = new StringBuilder(); | |||
offset += XSSFBUtils.readXLWideString(data, offset, sb); | |||
String relId = sb.toString(); | |||
sb.setLength(0); | |||
XSSFBUtils.readXLWideString(data, offset, sb); | |||
String name = sb.toString(); | |||
if (relId != null && relId.trim().length() > 0) { | |||
sheets.add(new XSSFSheetRef(relId, name)); | |||
} | |||
} | |||
List<XSSFSheetRef> getSheets() { | |||
return sheets; | |||
} | |||
} | |||
} |
@@ -16,15 +16,16 @@ | |||
==================================================================== */ | |||
package org.apache.poi.xssf.eventusermodel; | |||
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; | |||
import javax.xml.parsers.ParserConfigurationException; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.Collections; | |||
import java.util.HashMap; | |||
import java.util.Iterator; | |||
import java.util.LinkedList; | |||
import java.util.List; | |||
import java.util.Locale; | |||
import java.util.Map; | |||
import org.apache.poi.POIXMLException; | |||
@@ -39,6 +40,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; | |||
import org.apache.poi.openxml4j.opc.PackagingURIHelper; | |||
import org.apache.poi.util.POILogFactory; | |||
import org.apache.poi.util.POILogger; | |||
import org.apache.poi.util.SAXHelper; | |||
import org.apache.poi.xssf.model.CommentsTable; | |||
import org.apache.poi.xssf.model.SharedStringsTable; | |||
import org.apache.poi.xssf.model.StylesTable; | |||
@@ -47,9 +49,11 @@ import org.apache.poi.xssf.usermodel.XSSFDrawing; | |||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||
import org.apache.poi.xssf.usermodel.XSSFShape; | |||
import org.apache.xmlbeans.XmlException; | |||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; | |||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook; | |||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument; | |||
import org.xml.sax.Attributes; | |||
import org.xml.sax.InputSource; | |||
import org.xml.sax.SAXException; | |||
import org.xml.sax.XMLReader; | |||
import org.xml.sax.helpers.DefaultHandler; | |||
/** | |||
* This class makes it easy to get at individual parts | |||
@@ -62,8 +66,8 @@ public class XSSFReader { | |||
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class); | |||
private OPCPackage pkg; | |||
private PackagePart workbookPart; | |||
protected OPCPackage pkg; | |||
protected PackagePart workbookPart; | |||
/** | |||
* Creates a new XSSFReader, for the given package | |||
@@ -194,23 +198,23 @@ public class XSSFReader { | |||
private final Map<String, PackagePart> sheetMap; | |||
/** | |||
* Current CTSheet bean | |||
* Current sheet reference | |||
*/ | |||
private CTSheet ctSheet; | |||
XSSFSheetRef xssfSheetRef; | |||
/** | |||
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order. | |||
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order, | |||
* i.e. as they are stored in the underlying package | |||
*/ | |||
private final Iterator<CTSheet> sheetIterator; | |||
final Iterator<XSSFSheetRef> sheetIterator; | |||
/** | |||
* Construct a new SheetIterator | |||
* | |||
* @param wb package part holding workbook.xml | |||
*/ | |||
private SheetIterator(PackagePart wb) throws IOException { | |||
SheetIterator(PackagePart wb) throws IOException { | |||
/** | |||
* The order of sheets is defined by the order of CTSheet elements in workbook.xml | |||
@@ -228,25 +232,44 @@ public class XSSFReader { | |||
sheetMap.put(rel.getId(), pkg.getPart(relName)); | |||
} | |||
} | |||
//step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator | |||
//Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search | |||
CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook(); | |||
List<CTSheet> validSheets = new ArrayList<CTSheet>(); | |||
for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) { | |||
//if there's no relationship id, silently skip the sheet | |||
String sheetId = ctSheet.getId(); | |||
if (sheetId != null && sheetId.length() > 0) { | |||
validSheets.add(ctSheet); | |||
} | |||
} | |||
sheetIterator = validSheets.iterator(); | |||
//step 2. Read array of CTSheet elements, wrap it in a LinkedList | |||
//and construct an iterator | |||
sheetIterator = createSheetIteratorFromWB(wb); | |||
} catch (InvalidFormatException e){ | |||
throw new POIXMLException(e); | |||
} catch (XmlException e){ | |||
} | |||
} | |||
Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException { | |||
XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader(); | |||
XMLReader xmlReader = null; | |||
try { | |||
xmlReader = SAXHelper.newXMLReader(); | |||
} catch (ParserConfigurationException e) { | |||
throw new POIXMLException(e); | |||
} catch (SAXException e) { | |||
throw new POIXMLException(e); | |||
} | |||
xmlReader.setContentHandler(xmlSheetRefReader); | |||
try { | |||
xmlReader.parse(new InputSource(wb.getInputStream())); | |||
} catch (SAXException e) { | |||
throw new POIXMLException(e); | |||
} | |||
List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>(); | |||
for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) { | |||
//if there's no relationship id, silently skip the sheet | |||
String sheetId = xssfSheetRef.getId(); | |||
if (sheetId != null && sheetId.length() > 0) { | |||
validSheets.add(xssfSheetRef); | |||
} | |||
} | |||
return validSheets.iterator(); | |||
} | |||
/** | |||
* Returns <tt>true</tt> if the iteration has more elements. | |||
* | |||
@@ -264,9 +287,9 @@ public class XSSFReader { | |||
*/ | |||
@Override | |||
public InputStream next() { | |||
ctSheet = sheetIterator.next(); | |||
xssfSheetRef = sheetIterator.next(); | |||
String sheetId = ctSheet.getId(); | |||
String sheetId = xssfSheetRef.getId(); | |||
try { | |||
PackagePart sheetPkg = sheetMap.get(sheetId); | |||
return sheetPkg.getInputStream(); | |||
@@ -281,7 +304,7 @@ public class XSSFReader { | |||
* @return name of the current sheet | |||
*/ | |||
public String getSheetName() { | |||
return ctSheet.getName(); | |||
return xssfSheetRef.getName(); | |||
} | |||
/** | |||
@@ -344,7 +367,7 @@ public class XSSFReader { | |||
} | |||
public PackagePart getSheetPart() { | |||
String sheetId = ctSheet.getId(); | |||
String sheetId = xssfSheetRef.getId(); | |||
return sheetMap.get(sheetId); | |||
} | |||
@@ -356,4 +379,52 @@ public class XSSFReader { | |||
throw new IllegalStateException("Not supported"); | |||
} | |||
} | |||
protected final static class XSSFSheetRef { | |||
//do we need to store sheetId, too? | |||
private final String id; | |||
private final String name; | |||
public XSSFSheetRef(String id, String name) { | |||
this.id = id; | |||
this.name = name; | |||
} | |||
public String getId() { | |||
return id; | |||
} | |||
public String getName() { | |||
return name; | |||
} | |||
} | |||
//scrapes sheet reference info and order from workbook.xml | |||
private static class XMLSheetRefReader extends DefaultHandler { | |||
private final static String SHEET = "sheet"; | |||
private final static String ID = "id"; | |||
private final static String NAME = "name"; | |||
private final List<XSSFSheetRef> sheetRefs = new LinkedList(); | |||
@Override | |||
public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException { | |||
if (localName.toLowerCase(Locale.US).equals(SHEET)) { | |||
String name = null; | |||
String id = null; | |||
for (int i = 0; i < attrs.getLength(); i++) { | |||
if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) { | |||
name = attrs.getValue(i); | |||
} else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) { | |||
id = attrs.getValue(i); | |||
} | |||
sheetRefs.add(new XSSFSheetRef(id, name)); | |||
} | |||
} | |||
} | |||
List<XSSFSheetRef> getSheetRefs() { | |||
return Collections.unmodifiableList(sheetRefs); | |||
} | |||
} | |||
} |
@@ -0,0 +1,160 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.extractor; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import org.apache.poi.POIXMLTextExtractor; | |||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | |||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||
import org.apache.poi.ss.usermodel.DataFormatter; | |||
import org.apache.poi.xssf.binary.XSSFBCommentsTable; | |||
import org.apache.poi.xssf.binary.XSSFBHyperlinksTable; | |||
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable; | |||
import org.apache.poi.xssf.binary.XSSFBSheetHandler; | |||
import org.apache.poi.xssf.binary.XSSFBStylesTable; | |||
import org.apache.poi.xssf.eventusermodel.XSSFBReader; | |||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; | |||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||
import org.apache.xmlbeans.XmlException; | |||
import org.xml.sax.SAXException; | |||
/** | |||
* Implementation of a text extractor or xlsb Excel | |||
* files that uses SAX-like binary parsing. | |||
*/ | |||
public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor | |||
implements org.apache.poi.ss.extractor.ExcelExtractor { | |||
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] { | |||
XSSFRelation.XLSB_BINARY_WORKBOOK | |||
}; | |||
private boolean handleHyperlinksInCells = false; | |||
public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { | |||
super(path); | |||
} | |||
public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException { | |||
super(container); | |||
} | |||
public static void main(String[] args) throws Exception { | |||
if (args.length < 1) { | |||
System.err.println("Use:"); | |||
System.err.println(" XSSFBEventBasedExcelExtractor <filename.xlsb>"); | |||
System.exit(1); | |||
} | |||
POIXMLTextExtractor extractor = | |||
new XSSFBEventBasedExcelExtractor(args[0]); | |||
System.out.println(extractor.getText()); | |||
extractor.close(); | |||
} | |||
public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) { | |||
this.handleHyperlinksInCells = handleHyperlinksInCells; | |||
} | |||
/** | |||
* Should we return the formula itself, and not | |||
* the result it produces? Default is false | |||
* This is currently unsupported for xssfb | |||
*/ | |||
@Override | |||
public void setFormulasNotResults(boolean formulasNotResults) { | |||
throw new IllegalArgumentException("Not currently supported"); | |||
} | |||
/** | |||
* Processes the given sheet | |||
*/ | |||
public void processSheet( | |||
SheetContentsHandler sheetContentsExtractor, | |||
XSSFBStylesTable styles, | |||
XSSFBCommentsTable comments, | |||
XSSFBSharedStringsTable strings, | |||
InputStream sheetInputStream) | |||
throws IOException, SAXException { | |||
DataFormatter formatter; | |||
if (locale == null) { | |||
formatter = new DataFormatter(); | |||
} else { | |||
formatter = new DataFormatter(locale); | |||
} | |||
XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler( | |||
sheetInputStream, | |||
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults | |||
); | |||
xssfbSheetHandler.parse(); | |||
} | |||
/** | |||
* Processes the file and returns the text | |||
*/ | |||
public String getText() { | |||
try { | |||
XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container); | |||
XSSFBReader xssfbReader = new XSSFBReader(container); | |||
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable(); | |||
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData(); | |||
StringBuffer text = new StringBuffer(); | |||
SheetTextExtractor sheetExtractor = new SheetTextExtractor(); | |||
XSSFBHyperlinksTable hyperlinksTable = null; | |||
while (iter.hasNext()) { | |||
InputStream stream = iter.next(); | |||
if (includeSheetNames) { | |||
text.append(iter.getSheetName()); | |||
text.append('\n'); | |||
} | |||
if (handleHyperlinksInCells) { | |||
hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart()); | |||
} | |||
XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null; | |||
processSheet(sheetExtractor, styles, comments, strings, stream); | |||
if (includeHeadersFooters) { | |||
sheetExtractor.appendHeaderText(text); | |||
} | |||
sheetExtractor.appendCellText(text); | |||
if (includeTextBoxes) { | |||
processShapes(iter.getShapes(), text); | |||
} | |||
if (includeHeadersFooters) { | |||
sheetExtractor.appendFooterText(text); | |||
} | |||
sheetExtractor.reset(); | |||
stream.close(); | |||
} | |||
return text.toString(); | |||
} catch (IOException e) { | |||
System.err.println(e); | |||
return null; | |||
} catch (SAXException se) { | |||
System.err.println(se); | |||
return null; | |||
} catch (OpenXML4JException o4je) { | |||
System.err.println(o4je); | |||
return null; | |||
} | |||
} | |||
} |
@@ -54,15 +54,15 @@ import org.xml.sax.XMLReader; | |||
*/ | |||
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor | |||
implements org.apache.poi.ss.extractor.ExcelExtractor { | |||
private OPCPackage container; | |||
OPCPackage container; | |||
private POIXMLProperties properties; | |||
private Locale locale; | |||
private boolean includeTextBoxes = true; | |||
private boolean includeSheetNames = true; | |||
private boolean includeCellComments = false; | |||
private boolean includeHeadersFooters = true; | |||
private boolean formulasNotResults = false; | |||
Locale locale; | |||
boolean includeTextBoxes = true; | |||
boolean includeSheetNames = true; | |||
boolean includeCellComments = false; | |||
boolean includeHeadersFooters = true; | |||
boolean formulasNotResults = false; | |||
private boolean concatenatePhoneticRuns = true; | |||
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { | |||
@@ -240,7 +240,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor | |||
} | |||
} | |||
private void processShapes(List<XSSFShape> shapes, StringBuffer text) { | |||
void processShapes(List<XSSFShape> shapes, StringBuffer text) { | |||
if (shapes == null){ | |||
return; | |||
} | |||
@@ -349,7 +349,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor | |||
* @see XSSFExcelExtractor#getText() | |||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter) | |||
*/ | |||
private void appendHeaderText(StringBuffer buffer) { | |||
void appendHeaderText(StringBuffer buffer) { | |||
appendHeaderFooterText(buffer, "firstHeader"); | |||
appendHeaderFooterText(buffer, "oddHeader"); | |||
appendHeaderFooterText(buffer, "evenHeader"); | |||
@@ -361,7 +361,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor | |||
* @see XSSFExcelExtractor#getText() | |||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter) | |||
*/ | |||
private void appendFooterText(StringBuffer buffer) { | |||
void appendFooterText(StringBuffer buffer) { | |||
// append the text for each footer type in the same order | |||
// they are appended in XSSFExcelExtractor | |||
appendHeaderFooterText(buffer, "firstFooter"); | |||
@@ -372,7 +372,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor | |||
/** | |||
* Append the cell contents we have collected. | |||
*/ | |||
private void appendCellText(StringBuffer buffer) { | |||
void appendCellText(StringBuffer buffer) { | |||
checkMaxTextSize(buffer, output.toString()); | |||
buffer.append(output); | |||
} | |||
@@ -380,7 +380,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor | |||
/** | |||
* Reset this <code>SheetTextExtractor</code> for the next sheet. | |||
*/ | |||
private void reset() { | |||
void reset() { | |||
output.setLength(0); | |||
firstCellOfRow = true; | |||
if (headerFooterMap != null) { |
@@ -68,6 +68,7 @@ public class TestExtractorFactory { | |||
private static File xlsxStrict; | |||
private static File xltx; | |||
private static File xlsEmb; | |||
private static File xlsb; | |||
private static File doc; | |||
private static File doc6; | |||
@@ -108,6 +109,7 @@ public class TestExtractorFactory { | |||
xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx"); | |||
xltx = getFileAndCheck(ssTests, "test.xltx"); | |||
xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls"); | |||
xlsb = getFileAndCheck(ssTests, "testVarious.xlsb"); | |||
POIDataSamples wpTests = POIDataSamples.getDocumentInstance(); | |||
doc = getFileAndCheck(wpTests, "SampleDoc.doc"); | |||
@@ -172,6 +174,13 @@ public class TestExtractorFactory { | |||
); | |||
extractor.close(); | |||
extractor = ExtractorFactory.createExtractor(xlsb); | |||
assertTrue( | |||
extractor.getText().contains("test") | |||
); | |||
extractor.close(); | |||
extractor = ExtractorFactory.createExtractor(xltx); | |||
assertTrue( | |||
extractor.getText().contains("test") |
@@ -0,0 +1,56 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import static org.junit.Assert.assertEquals; | |||
import java.util.List; | |||
import java.util.regex.Pattern; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||
import org.apache.poi.openxml4j.opc.PackagePart; | |||
import org.junit.Test; | |||
public class TestXSSFBSharedStringsTable { | |||
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); | |||
@Test | |||
public void testBasic() throws Exception { | |||
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsb")); | |||
List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.bin")); | |||
assertEquals(1, parts.size()); | |||
XSSFBSharedStringsTable rtbl = new XSSFBSharedStringsTable(parts.get(0)); | |||
List<String> strings = rtbl.getItems(); | |||
assertEquals(49, strings.size()); | |||
assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0)); | |||
assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3)); | |||
assertEquals(55, rtbl.getCount()); | |||
assertEquals(49, rtbl.getUniqueCount()); | |||
//TODO: add in tests for phonetic runs | |||
} | |||
} |
@@ -0,0 +1,54 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.binary; | |||
import static org.junit.Assert.assertEquals; | |||
import static org.junit.Assert.assertNotNull; | |||
import java.util.List; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||
import org.apache.poi.ss.util.CellAddress; | |||
import org.apache.poi.xssf.eventusermodel.XSSFBReader; | |||
import org.apache.poi.xssf.eventusermodel.XSSFReader; | |||
import org.junit.Test; | |||
public class TestXSSFBSheetHyperlinkManager { | |||
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); | |||
@Test | |||
public void testBasic() throws Exception { | |||
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb")); | |||
XSSFBReader reader = new XSSFBReader(pkg); | |||
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData(); | |||
it.next(); | |||
XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart()); | |||
List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0)); | |||
assertNotNull(records); | |||
assertEquals(1, records.size()); | |||
XSSFHyperlinkRecord record = records.get(0); | |||
assertEquals("http://tika.apache.org/", record.getLocation()); | |||
assertEquals("rId2", record.getRelId()); | |||
} | |||
} |
@@ -0,0 +1,224 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.eventusermodel; | |||
import static org.junit.Assert.assertEquals; | |||
import static org.junit.Assert.assertNotNull; | |||
import static org.junit.Assert.fail; | |||
import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.List; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||
import org.apache.poi.ss.usermodel.DataFormatter; | |||
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable; | |||
import org.apache.poi.xssf.binary.XSSFBSheetHandler; | |||
import org.apache.poi.xssf.binary.XSSFBStylesTable; | |||
import org.apache.poi.xssf.usermodel.XSSFComment; | |||
import org.junit.Test; | |||
public class TestXSSFBReader { | |||
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); | |||
@Test | |||
public void testBasic() throws Exception { | |||
List<String> sheetTexts = getSheets("testVarious.xlsb"); | |||
assertEquals(1, sheetTexts.size()); | |||
String xsxml = sheetTexts.get(0); | |||
assertContains("This is a string", xsxml); | |||
assertContains("<td ref=\"B2\">13</td>", xsxml); | |||
assertContains("<td ref=\"B3\">13.12112313</td>", xsxml); | |||
assertContains("<td ref=\"B4\">$ 3.03</td>", xsxml); | |||
assertContains("<td ref=\"B5\">20%</td>", xsxml); | |||
assertContains("<td ref=\"B6\">13.12</td>", xsxml); | |||
assertContains("<td ref=\"B7\">1.23457E+14</td>", xsxml); | |||
assertContains("<td ref=\"B8\">1.23457E+15</td>", xsxml); | |||
assertContains("46/1963", xsxml);//custom format 1 | |||
assertContains("3/128", xsxml);//custom format 2 | |||
assertContains("<tr num=\"7>\n" + | |||
"\t<td ref=\"A8\">longer int</td>\n" + | |||
"\t<td ref=\"B8\">1.23457E+15</td>\n" + | |||
"\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||
"test comment2</span></td>\n" + | |||
"</tr num=\"7>", xsxml); | |||
assertContains("<tr num=\"34>\n" + | |||
"\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||
"comment6 actually in cell</span></td>\n" + | |||
"</tr num=\"34>", xsxml); | |||
assertContains("<tr num=\"64>\n" + | |||
"\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||
"comment7 end of file</span></td>\n" + | |||
"</tr num=\"64>", xsxml); | |||
assertContains("<tr num=\"65>\n" + | |||
"\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||
"comment8 end of file</span></td>\n" + | |||
"</tr num=\"65>", xsxml); | |||
assertContains("<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>", xsxml); | |||
assertContains("<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>", xsxml); | |||
assertContains( | |||
"<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>", | |||
xsxml); | |||
assertContains( | |||
"<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>", | |||
xsxml); | |||
assertContains( | |||
"<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>", | |||
xsxml); | |||
assertContains( | |||
"<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>", | |||
xsxml); | |||
} | |||
@Test | |||
public void testComments() throws Exception { | |||
List<String> sheetTexts = getSheets("comments.xlsb"); | |||
String xsxml = sheetTexts.get(0); | |||
assertContains( | |||
"<tr num=\"0>\n" + | |||
"\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" + | |||
"\t<td ref=\"B1\">row1</td>\n" + | |||
"</tr num=\"0>", xsxml); | |||
assertContains( | |||
"<tr num=\"1>\n" + | |||
"\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||
"comment row2 (index1)</span></td>\n" + | |||
"</tr num=\"1>", | |||
xsxml); | |||
assertContains("<tr num=\"2>\n" + | |||
"\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" + | |||
"\t<td ref=\"B3\">row3</td>\n", xsxml); | |||
assertContains("<tr num=\"3>\n" + | |||
"\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" + | |||
"\t<td ref=\"B4\">row4</td>\n" + | |||
"</tr num=\"3></sheet>", xsxml); | |||
} | |||
private List<String> getSheets(String testFileName) throws Exception { | |||
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName)); | |||
List<String> sheetTexts = new ArrayList<String>(); | |||
XSSFBReader r = new XSSFBReader(pkg); | |||
// assertNotNull(r.getWorkbookData()); | |||
// assertNotNull(r.getSharedStringsData()); | |||
assertNotNull(r.getXSSFBStylesTable()); | |||
XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg); | |||
XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable(); | |||
XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData(); | |||
while (it.hasNext()) { | |||
InputStream is = it.next(); | |||
String name = it.getSheetName(); | |||
TestSheetHandler testSheetHandler = new TestSheetHandler(); | |||
testSheetHandler.startSheet(name); | |||
XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is, | |||
xssfbStylesTable, | |||
it.getXSSFBSheetComments(), | |||
sst, testSheetHandler, | |||
new DataFormatter(), | |||
false); | |||
sheetHandler.parse(); | |||
testSheetHandler.endSheet(); | |||
sheetTexts.add(testSheetHandler.toString()); | |||
} | |||
return sheetTexts; | |||
} | |||
//This converts all [\r\n\t]+ to " " | |||
private void assertContains(String needle, String haystack) { | |||
needle = needle.replaceAll("[\r\n\t]+", " "); | |||
haystack = haystack.replaceAll("[\r\n\t]+", " "); | |||
if (haystack.indexOf(needle) < 0) { | |||
fail("couldn't find >"+needle+"< in: "+haystack ); | |||
} | |||
} | |||
@Test | |||
public void testDate() throws Exception { | |||
List<String> sheets = getSheets("date.xlsb"); | |||
assertEquals(1, sheets.size()); | |||
assertContains("1/12/13", sheets.get(0)); | |||
} | |||
private class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler { | |||
private final StringBuilder sb = new StringBuilder(); | |||
public void startSheet(String sheetName) { | |||
sb.append("<sheet name=\"").append(sheetName).append(">"); | |||
} | |||
public void endSheet(){ | |||
sb.append("</sheet>"); | |||
} | |||
@Override | |||
public void startRow(int rowNum) { | |||
sb.append("\n<tr num=\"").append(rowNum).append(">"); | |||
} | |||
@Override | |||
public void endRow(int rowNum) { | |||
sb.append("\n</tr num=\"").append(rowNum).append(">"); | |||
} | |||
@Override | |||
public void cell(String cellReference, String formattedValue, XSSFComment comment) { | |||
formattedValue = (formattedValue == null) ? "" : formattedValue; | |||
if (comment == null) { | |||
sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>"); | |||
} else { | |||
sb.append("\n\t<td ref=\"").append(cellReference).append("\">") | |||
.append(formattedValue) | |||
.append("<span type=\"comment\" author=\"") | |||
.append(comment.getAuthor()).append("\">") | |||
.append(comment.getString().toString().trim()).append("</span>") | |||
.append("</td>"); | |||
} | |||
} | |||
@Override | |||
public void headerFooter(String text, boolean isHeader, String tagName) { | |||
if (isHeader) { | |||
sb.append("<header tagName=\""+tagName+"\">"+text+"</header>"); | |||
} else { | |||
sb.append("<footer tagName=\""+tagName+"\">"+text+"</footer>"); | |||
} | |||
} | |||
@Override | |||
public String toString() { | |||
return sb.toString(); | |||
} | |||
} | |||
} |
@@ -0,0 +1,102 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.extractor; | |||
import static org.junit.Assert.assertEquals; | |||
import static org.junit.Assert.assertTrue; | |||
import org.apache.poi.xssf.XSSFTestDataSamples; | |||
import org.junit.Test; | |||
/** | |||
* Tests for {@link XSSFBEventBasedExcelExtractor} | |||
*/ | |||
public class TestXSSFBEventBasedExcelExtractor { | |||
protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { | |||
return new XSSFBEventBasedExcelExtractor(XSSFTestDataSamples. | |||
openSamplePackage(sampleName)); | |||
} | |||
/** | |||
* Get text out of the simple file | |||
*/ | |||
@Test | |||
public void testGetSimpleText() throws Exception { | |||
// a very simple file | |||
XSSFEventBasedExcelExtractor extractor = getExtractor("sample.xlsb"); | |||
extractor.setIncludeCellComments(true); | |||
extractor.getText(); | |||
String text = extractor.getText(); | |||
assertTrue(text.length() > 0); | |||
// Check sheet names | |||
assertTrue(text.startsWith("Sheet1")); | |||
assertTrue(text.endsWith("Sheet3\n")); | |||
// Now without, will have text | |||
extractor.setIncludeSheetNames(false); | |||
text = extractor.getText(); | |||
String CHUNK1 = | |||
"Lorem\t111\n" + | |||
"ipsum\t222\n" + | |||
"dolor\t333\n" + | |||
"sit\t444\n" + | |||
"amet\t555\n" + | |||
"consectetuer\t666\n" + | |||
"adipiscing\t777\n" + | |||
"elit\t888\n" + | |||
"Nunc\t999\n"; | |||
String CHUNK2 = | |||
"The quick brown fox jumps over the lazy dog\n" + | |||
"hello, xssf hello, xssf\n" + | |||
"hello, xssf hello, xssf\n" + | |||
"hello, xssf hello, xssf\n" + | |||
"hello, xssf hello, xssf\n"; | |||
assertEquals( | |||
CHUNK1 + | |||
"at\t4995\n" + | |||
CHUNK2 | |||
, text); | |||
} | |||
/** | |||
* Test text extraction from text box using getShapes() | |||
* | |||
* @throws Exception | |||
*/ | |||
@Test | |||
public void testShapes() throws Exception { | |||
XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsb"); | |||
try { | |||
String text = ooxmlExtractor.getText(); | |||
assertTrue(text.indexOf("Line 1") > -1); | |||
assertTrue(text.indexOf("Line 2") > -1); | |||
assertTrue(text.indexOf("Line 3") > -1); | |||
} finally { | |||
ooxmlExtractor.close(); | |||
} | |||
} | |||
} |