git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1787228 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_16_FINAL
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; | import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; | ||||
import org.apache.poi.xslf.usermodel.XSLFRelation; | import org.apache.poi.xslf.usermodel.XSLFRelation; | ||||
import org.apache.poi.xslf.usermodel.XSLFSlideShow; | import org.apache.poi.xslf.usermodel.XSLFSlideShow; | ||||
import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; | |||||
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; | import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; | ||||
import org.apache.poi.xssf.extractor.XSSFExcelExtractor; | import org.apache.poi.xssf.extractor.XSSFExcelExtractor; | ||||
import org.apache.poi.xssf.usermodel.XSSFRelation; | import org.apache.poi.xssf.usermodel.XSSFRelation; | ||||
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg)); | return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg)); | ||||
} | } | ||||
// How about xlsb? | |||||
for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) { | |||||
if (rel.getContentType().equals(contentType)) { | |||||
return new XSSFBEventBasedExcelExtractor(pkg); | |||||
} | |||||
} | |||||
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")"); | throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")"); | ||||
} catch (IOException e) { | } catch (IOException e) { |
import org.apache.poi.UnsupportedFileFormatException; | import org.apache.poi.UnsupportedFileFormatException; | ||||
/** | /** | ||||
* We don't support .xlsb files, sorry | |||||
* We don't support .xlsb for read and write via {@link org.apache.poi.xssf.usermodel.XSSFWorkbook}. | |||||
* As of POI 3.15-beta3, we do support streaming reading of xlsb files | |||||
* via {@link org.apache.poi.xssf.eventusermodel.XSSFBReader} | |||||
*/ | */ | ||||
public class XLSBUnsupportedException extends UnsupportedFileFormatException { | public class XLSBUnsupportedException extends UnsupportedFileFormatException { | ||||
private static final long serialVersionUID = 7849681804154571175L; | private static final long serialVersionUID = 7849681804154571175L; |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import org.apache.poi.ss.util.CellReference; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.LittleEndian; | |||||
/** | |||||
* This class encapsulates what the spec calls a "Cell" object. | |||||
* I added "Header" to clarify that this does not contain the contents | |||||
* of the cell, only the column number, the style id and the phonetic boolean | |||||
*/ | |||||
@Internal | |||||
class XSSFBCellHeader { | |||||
public static int length = 8; | |||||
/** | |||||
* | |||||
* @param data raw data | |||||
* @param offset offset at which to start reading the record | |||||
* @param currentRow 0-based current row count | |||||
* @param cell cell buffer to update | |||||
*/ | |||||
public static void parse(byte[] data, int offset, int currentRow, XSSFBCellHeader cell) { | |||||
long colNum = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; | |||||
int styleIdx = XSSFBUtils.get24BitInt(data, offset); offset += 3; | |||||
//TODO: range checking | |||||
boolean showPhonetic = false;//TODO: fill this out | |||||
cell.reset(currentRow, (int)colNum, styleIdx, showPhonetic); | |||||
} | |||||
private int rowNum; | |||||
private int colNum; | |||||
private int styleIdx; | |||||
private boolean showPhonetic; | |||||
public void reset(int rowNum, int colNum, int styleIdx, boolean showPhonetic) { | |||||
this.rowNum = rowNum; | |||||
this.colNum = colNum; | |||||
this.styleIdx = styleIdx; | |||||
this.showPhonetic = showPhonetic; | |||||
} | |||||
int getColNum() { | |||||
return colNum; | |||||
} | |||||
String formatAddressAsString() { | |||||
return CellReference.convertNumToColString(colNum)+(rowNum+1); | |||||
} | |||||
int getStyleIdx() { | |||||
return styleIdx; | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.LittleEndian; | |||||
@Internal | |||||
class XSSFBCellRange { | |||||
public final static int length = 4* LittleEndian.INT_SIZE; | |||||
/** | |||||
* Parses an RfX cell range from the data starting at the offset. | |||||
* This performs no range checking. | |||||
* @param data raw bytes | |||||
* @param offset offset at which to start reading from data | |||||
* @param cellRange to overwrite. If null, a new cellRange will be created. | |||||
* @return a mutable cell range. | |||||
*/ | |||||
public static XSSFBCellRange parse(byte[] data, int offset, XSSFBCellRange cellRange) { | |||||
if (cellRange == null) { | |||||
cellRange = new XSSFBCellRange(); | |||||
} | |||||
cellRange.firstRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE; | |||||
cellRange.lastRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE; | |||||
cellRange.firstCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE; | |||||
cellRange.lastCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); | |||||
return cellRange; | |||||
} | |||||
int firstRow; | |||||
int lastRow; | |||||
int firstCol; | |||||
int lastCol; | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import org.apache.poi.ss.usermodel.ClientAnchor; | |||||
import org.apache.poi.ss.usermodel.RichTextString; | |||||
import org.apache.poi.ss.util.CellAddress; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.xssf.usermodel.XSSFComment; | |||||
@Internal | |||||
class XSSFBComment extends XSSFComment { | |||||
private final CellAddress cellAddress; | |||||
private final String author; | |||||
private final XSSFBRichTextString comment; | |||||
private boolean visible = true; | |||||
XSSFBComment(CellAddress cellAddress, String author, String comment) { | |||||
super(null, null, null); | |||||
this.cellAddress = cellAddress; | |||||
this.author = author; | |||||
this.comment = new XSSFBRichTextString(comment); | |||||
} | |||||
@Override | |||||
public void setVisible(boolean visible) { | |||||
throw new IllegalArgumentException("XSSFBComment is read only."); | |||||
} | |||||
@Override | |||||
public boolean isVisible() { | |||||
return visible; | |||||
} | |||||
@Override | |||||
public CellAddress getAddress() { | |||||
return cellAddress; | |||||
} | |||||
@Override | |||||
public void setAddress(CellAddress addr) { | |||||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||||
} | |||||
@Override | |||||
public void setAddress(int row, int col) { | |||||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||||
} | |||||
@Override | |||||
public int getRow() { | |||||
return cellAddress.getRow(); | |||||
} | |||||
@Override | |||||
public void setRow(int row) { | |||||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||||
} | |||||
@Override | |||||
public int getColumn() { | |||||
return cellAddress.getColumn(); | |||||
} | |||||
@Override | |||||
public void setColumn(int col) { | |||||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||||
} | |||||
@Override | |||||
public String getAuthor() { | |||||
return author; | |||||
} | |||||
@Override | |||||
public void setAuthor(String author) { | |||||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||||
} | |||||
@Override | |||||
public XSSFBRichTextString getString() { | |||||
return comment; | |||||
} | |||||
@Override | |||||
public void setString(RichTextString string) { | |||||
throw new IllegalArgumentException("XSSFBComment is read only"); | |||||
} | |||||
@Override | |||||
public ClientAnchor getClientAnchor() { | |||||
return null; | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.util.ArrayList; | |||||
import java.util.Comparator; | |||||
import java.util.LinkedList; | |||||
import java.util.List; | |||||
import java.util.Map; | |||||
import java.util.Queue; | |||||
import java.util.TreeMap; | |||||
import org.apache.poi.ss.util.CellAddress; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.LittleEndian; | |||||
@Internal | |||||
public class XSSFBCommentsTable extends XSSFBParser { | |||||
private Map<CellAddress, XSSFBComment> comments = new TreeMap<CellAddress, XSSFBComment>(new CellAddressComparator());//String is the cellAddress A1 | |||||
private Queue<CellAddress> commentAddresses = new LinkedList<CellAddress>(); | |||||
private List<String> authors = new ArrayList<String>(); | |||||
//these are all used only during parsing, and they are mutable! | |||||
private int authorId = -1; | |||||
private CellAddress cellAddress = null; | |||||
private XSSFBCellRange cellRange = null; | |||||
private String comment = null; | |||||
private StringBuilder authorBuffer = new StringBuilder(); | |||||
public XSSFBCommentsTable(InputStream is) throws IOException { | |||||
super(is); | |||||
parse(); | |||||
commentAddresses.addAll(comments.keySet()); | |||||
} | |||||
@Override | |||||
public void handleRecord(int id, byte[] data) throws XSSFBParseException { | |||||
XSSFBRecordType recordType = XSSFBRecordType.lookup(id); | |||||
switch (recordType) { | |||||
case BrtBeginComment: | |||||
int offset = 0; | |||||
authorId = XSSFBUtils.castToInt(LittleEndian.getUInt(data)); offset += LittleEndian.INT_SIZE; | |||||
cellRange = XSSFBCellRange.parse(data, offset, cellRange); | |||||
offset+= XSSFBCellRange.length; | |||||
//for strict parsing; confirm that firstRow==lastRow and firstCol==colLats (2.4.28) | |||||
cellAddress = new CellAddress(cellRange.firstRow, cellRange.firstCol); | |||||
break; | |||||
case BrtCommentText: | |||||
XSSFBRichStr xssfbRichStr = XSSFBRichStr.build(data, 0); | |||||
comment = xssfbRichStr.getString(); | |||||
break; | |||||
case BrtEndComment: | |||||
comments.put(cellAddress, new XSSFBComment(cellAddress, authors.get(authorId), comment)); | |||||
authorId = -1; | |||||
cellAddress = null; | |||||
break; | |||||
case BrtCommentAuthor: | |||||
authorBuffer.setLength(0); | |||||
XSSFBUtils.readXLWideString(data, 0, authorBuffer); | |||||
authors.add(authorBuffer.toString()); | |||||
break; | |||||
} | |||||
} | |||||
public Queue<CellAddress> getAddresses() { | |||||
return commentAddresses; | |||||
} | |||||
public XSSFBComment get(CellAddress cellAddress) { | |||||
if (cellAddress == null) { | |||||
return null; | |||||
} | |||||
return comments.get(cellAddress); | |||||
} | |||||
private final static class CellAddressComparator implements Comparator<CellAddress> { | |||||
@Override | |||||
public int compare(CellAddress o1, CellAddress o2) { | |||||
if (o1.getRow() < o2.getRow()) { | |||||
return -1; | |||||
} else if (o1.getRow() > o2.getRow()) { | |||||
return 1; | |||||
} | |||||
if (o1.getColumn() < o2.getColumn()) { | |||||
return -1; | |||||
} else if (o1.getColumn() > o2.getColumn()) { | |||||
return 1; | |||||
} | |||||
return 0; | |||||
} | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper; | |||||
@Internal | |||||
class XSSFBHeaderFooter { | |||||
private final String headerFooterTypeLabel; | |||||
private final boolean isHeader; | |||||
private String rawString; | |||||
private HeaderFooterHelper headerFooterHelper = new HeaderFooterHelper(); | |||||
XSSFBHeaderFooter(String headerFooterTypeLabel, boolean isHeader) { | |||||
this.headerFooterTypeLabel = headerFooterTypeLabel; | |||||
this.isHeader = isHeader; | |||||
} | |||||
String getHeaderFooterTypeLabel() { | |||||
return headerFooterTypeLabel; | |||||
} | |||||
String getRawString() { | |||||
return rawString; | |||||
} | |||||
String getString() { | |||||
StringBuilder sb = new StringBuilder(); | |||||
String left = headerFooterHelper.getLeftSection(rawString); | |||||
String center = headerFooterHelper.getCenterSection(rawString); | |||||
String right = headerFooterHelper.getRightSection(rawString); | |||||
if (left != null && left.length() > 0) { | |||||
sb.append(left); | |||||
} | |||||
if (center != null && center.length() > 0) { | |||||
if (sb.length() > 0) { | |||||
sb.append(" "); | |||||
} | |||||
sb.append(center); | |||||
} | |||||
if (right != null && right.length() > 0) { | |||||
if (sb.length() > 0) { | |||||
sb.append(" "); | |||||
} | |||||
sb.append(right); | |||||
} | |||||
return sb.toString(); | |||||
} | |||||
void setRawString(String rawString) { | |||||
this.rawString = rawString; | |||||
} | |||||
boolean isHeader() { | |||||
return isHeader; | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import org.apache.poi.util.Internal; | |||||
@Internal | |||||
class XSSFBHeaderFooters { | |||||
public static XSSFBHeaderFooters parse(byte[] data) { | |||||
boolean diffOddEven = false; | |||||
boolean diffFirst = false; | |||||
boolean scaleWDoc = false; | |||||
boolean alignMargins = false; | |||||
int offset = 2; | |||||
XSSFBHeaderFooters xssfbHeaderFooter = new XSSFBHeaderFooters(); | |||||
xssfbHeaderFooter.header = new XSSFBHeaderFooter("header", true); | |||||
xssfbHeaderFooter.footer = new XSSFBHeaderFooter("footer", false); | |||||
xssfbHeaderFooter.headerEven = new XSSFBHeaderFooter("evenHeader", true); | |||||
xssfbHeaderFooter.footerEven = new XSSFBHeaderFooter("evenFooter", false); | |||||
xssfbHeaderFooter.headerFirst = new XSSFBHeaderFooter("firstHeader", true); | |||||
xssfbHeaderFooter.footerFirst = new XSSFBHeaderFooter("firstFooter", false); | |||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.header); | |||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footer); | |||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerEven); | |||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footerEven); | |||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerFirst); | |||||
readHeaderFooter(data, offset, xssfbHeaderFooter.footerFirst); | |||||
return xssfbHeaderFooter; | |||||
} | |||||
private static int readHeaderFooter(byte[] data, int offset, XSSFBHeaderFooter headerFooter) { | |||||
if (offset + 4 >= data.length) { | |||||
return 0; | |||||
} | |||||
StringBuilder sb = new StringBuilder(); | |||||
int bytesRead = XSSFBUtils.readXLNullableWideString(data, offset, sb); | |||||
headerFooter.setRawString(sb.toString()); | |||||
return bytesRead; | |||||
} | |||||
private XSSFBHeaderFooter header; | |||||
private XSSFBHeaderFooter footer; | |||||
private XSSFBHeaderFooter headerEven; | |||||
private XSSFBHeaderFooter footerEven; | |||||
private XSSFBHeaderFooter headerFirst; | |||||
private XSSFBHeaderFooter footerFirst; | |||||
public XSSFBHeaderFooter getHeader() { | |||||
return header; | |||||
} | |||||
public XSSFBHeaderFooter getFooter() { | |||||
return footer; | |||||
} | |||||
public XSSFBHeaderFooter getHeaderEven() { | |||||
return headerEven; | |||||
} | |||||
public XSSFBHeaderFooter getFooterEven() { | |||||
return footerEven; | |||||
} | |||||
public XSSFBHeaderFooter getHeaderFirst() { | |||||
return headerFirst; | |||||
} | |||||
public XSSFBHeaderFooter getFooterFirst() { | |||||
return footerFirst; | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.util.ArrayList; | |||||
import java.util.BitSet; | |||||
import java.util.Comparator; | |||||
import java.util.HashMap; | |||||
import java.util.List; | |||||
import java.util.Map; | |||||
import java.util.TreeMap; | |||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||||
import org.apache.poi.openxml4j.opc.PackagePart; | |||||
import org.apache.poi.openxml4j.opc.PackageRelationship; | |||||
import org.apache.poi.ss.util.CellAddress; | |||||
import org.apache.poi.ss.util.CellRangeAddress; | |||||
import org.apache.poi.ss.util.CellRangeUtil; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||||
@Internal | |||||
public class XSSFBHyperlinksTable { | |||||
private final static BitSet RECORDS = new BitSet(); | |||||
static { | |||||
RECORDS.set(XSSFBRecordType.BrtHLink.getId()); | |||||
} | |||||
private final List<XSSFHyperlinkRecord> hyperlinkRecords = new ArrayList<XSSFHyperlinkRecord>(); | |||||
//cache the relId to hyperlink url from the sheet's .rels | |||||
private Map<String, String> relIdToHyperlink = new HashMap<String, String>(); | |||||
public XSSFBHyperlinksTable(PackagePart sheetPart) throws IOException { | |||||
//load the urls from the sheet .rels | |||||
loadUrlsFromSheetRels(sheetPart); | |||||
//now load the hyperlinks from the bottom of the sheet | |||||
HyperlinkSheetScraper scraper = new HyperlinkSheetScraper(sheetPart.getInputStream()); | |||||
scraper.parse(); | |||||
} | |||||
/** | |||||
* | |||||
* @return a map of the hyperlinks. The key is the top left cell address in their CellRange | |||||
*/ | |||||
public Map<CellAddress, List<XSSFHyperlinkRecord>> getHyperLinks() { | |||||
Map<CellAddress, List<XSSFHyperlinkRecord>> hyperlinkMap = | |||||
new TreeMap<CellAddress, List<XSSFHyperlinkRecord>>(new TopLeftCellAddressComparator()); | |||||
for (XSSFHyperlinkRecord hyperlinkRecord : hyperlinkRecords) { | |||||
CellAddress cellAddress = new CellAddress(hyperlinkRecord.getCellRangeAddress().getFirstRow(), | |||||
hyperlinkRecord.getCellRangeAddress().getFirstColumn()); | |||||
List<XSSFHyperlinkRecord> list = hyperlinkMap.get(cellAddress); | |||||
if (list == null) { | |||||
list = new ArrayList<XSSFHyperlinkRecord>(); | |||||
} | |||||
list.add(hyperlinkRecord); | |||||
hyperlinkMap.put(cellAddress, list); | |||||
} | |||||
return hyperlinkMap; | |||||
} | |||||
/** | |||||
* | |||||
* @param cellAddress cell address to find | |||||
* @return null if not a hyperlink | |||||
*/ | |||||
public List<XSSFHyperlinkRecord> findHyperlinkRecord(CellAddress cellAddress) { | |||||
List<XSSFHyperlinkRecord> overlapping = null; | |||||
CellRangeAddress targetCellRangeAddress = new CellRangeAddress(cellAddress.getRow(), | |||||
cellAddress.getRow(), | |||||
cellAddress.getColumn(), | |||||
cellAddress.getColumn()); | |||||
for (XSSFHyperlinkRecord record : hyperlinkRecords) { | |||||
if (CellRangeUtil.intersect(targetCellRangeAddress, record.getCellRangeAddress()) != CellRangeUtil.NO_INTERSECTION) { | |||||
if (overlapping == null) { | |||||
overlapping = new ArrayList<XSSFHyperlinkRecord>(); | |||||
} | |||||
overlapping.add(record); | |||||
} | |||||
} | |||||
return overlapping; | |||||
} | |||||
private void loadUrlsFromSheetRels(PackagePart sheetPart) { | |||||
try { | |||||
for (PackageRelationship rel : sheetPart.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation())) { | |||||
relIdToHyperlink.put(rel.getId(), rel.getTargetURI().toString()); | |||||
} | |||||
} catch (InvalidFormatException e) { | |||||
//swallow | |||||
} | |||||
} | |||||
private class HyperlinkSheetScraper extends XSSFBParser { | |||||
private XSSFBCellRange hyperlinkCellRange = new XSSFBCellRange(); | |||||
private final StringBuilder xlWideStringBuffer = new StringBuilder(); | |||||
HyperlinkSheetScraper(InputStream is) { | |||||
super(is, RECORDS); | |||||
} | |||||
@Override | |||||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { | |||||
if (recordType != XSSFBRecordType.BrtHLink.getId()) { | |||||
return; | |||||
} | |||||
int offset = 0; | |||||
String relId = ""; | |||||
String location = ""; | |||||
String toolTip = ""; | |||||
String display = ""; | |||||
hyperlinkCellRange = XSSFBCellRange.parse(data, offset, hyperlinkCellRange); | |||||
offset += XSSFBCellRange.length; | |||||
xlWideStringBuffer.setLength(0); | |||||
offset += XSSFBUtils.readXLNullableWideString(data, offset, xlWideStringBuffer); | |||||
relId = xlWideStringBuffer.toString(); | |||||
xlWideStringBuffer.setLength(0); | |||||
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer); | |||||
location = xlWideStringBuffer.toString(); | |||||
xlWideStringBuffer.setLength(0); | |||||
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer); | |||||
toolTip = xlWideStringBuffer.toString(); | |||||
xlWideStringBuffer.setLength(0); | |||||
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer); | |||||
display = xlWideStringBuffer.toString(); | |||||
CellRangeAddress cellRangeAddress = new CellRangeAddress(hyperlinkCellRange.firstRow, hyperlinkCellRange.lastRow, hyperlinkCellRange.firstCol, hyperlinkCellRange.lastCol); | |||||
String url = relIdToHyperlink.get(relId); | |||||
if (location == null || location.length() == 0) { | |||||
location = url; | |||||
} | |||||
hyperlinkRecords.add( | |||||
new XSSFHyperlinkRecord(cellRangeAddress, relId, location, toolTip, display) | |||||
); | |||||
} | |||||
} | |||||
private static class TopLeftCellAddressComparator implements Comparator<CellAddress> { | |||||
@Override | |||||
public int compare(CellAddress o1, CellAddress o2) { | |||||
if (o1.getRow() < o2.getRow()) { | |||||
return -1; | |||||
} else if (o1.getRow() > o2.getRow()) { | |||||
return 1; | |||||
} | |||||
if (o1.getColumn() < o2.getColumn()) { | |||||
return -1; | |||||
} else if (o1.getColumn() > o2.getColumn()) { | |||||
return 1; | |||||
} | |||||
return 0; | |||||
} | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
/** | |||||
* Parse exception while reading an xssfb | |||||
*/ | |||||
public class XSSFBParseException extends RuntimeException { | |||||
public XSSFBParseException(String msg) { | |||||
super(msg); | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.util.BitSet; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.LittleEndianInputStream; | |||||
/** | |||||
* Experimental parser for Microsoft's ooxml xssfb format. | |||||
* Not thread safe, obviously. Need to create a new one | |||||
* for each thread. | |||||
*/ | |||||
@Internal | |||||
public abstract class XSSFBParser { | |||||
private final LittleEndianInputStream is; | |||||
private final BitSet records; | |||||
public XSSFBParser(InputStream is) { | |||||
this.is = new LittleEndianInputStream(is); | |||||
records = null; | |||||
} | |||||
XSSFBParser(InputStream is, BitSet bitSet) { | |||||
this.is = new LittleEndianInputStream(is); | |||||
records = bitSet; | |||||
} | |||||
public void parse() throws IOException { | |||||
while (true) { | |||||
int bInt = is.read(); | |||||
if (bInt == -1) { | |||||
return; | |||||
} | |||||
readNext((byte) bInt); | |||||
} | |||||
} | |||||
private void readNext(byte b1) throws IOException { | |||||
int recordId = 0; | |||||
//if highest bit == 1 | |||||
if ((b1 >> 7 & 1) == 1) { | |||||
byte b2 = is.readByte(); | |||||
b1 &= ~(1<<7); //unset highest bit | |||||
b2 &= ~(1<<7); //unset highest bit (if it exists?) | |||||
recordId = (128*(int)b2)+(int)b1; | |||||
} else { | |||||
recordId = (int)b1; | |||||
} | |||||
long recordLength = 0; | |||||
int i = 0; | |||||
boolean halt = false; | |||||
while (i < 4 && ! halt) { | |||||
byte b = is.readByte(); | |||||
halt = (b >> 7 & 1) == 0; //if highest bit !=1 then continue | |||||
b &= ~(1<<7); | |||||
recordLength += (int)b << (i*7); //multiply by 128^i | |||||
i++; | |||||
} | |||||
if (records == null || records.get(recordId)) { | |||||
//add sanity check for length? | |||||
byte[] buff = new byte[(int) recordLength]; | |||||
is.readFully(buff); | |||||
handleRecord(recordId, buff); | |||||
} else { | |||||
long length = is.skip(recordLength); | |||||
if (length != recordLength) { | |||||
throw new XSSFBParseException("End of file reached before expected.\t"+ | |||||
"Tried to skip "+recordLength + ", but only skipped "+length); | |||||
} | |||||
} | |||||
} | |||||
//It hurts, hurts, hurts to create a new byte array for every record. | |||||
//However, on a large Excel spreadsheet, this parser was 1/3 faster than | |||||
//the ooxml sax parser (5 seconds for xssfb and 7.5 seconds for xssf. | |||||
//The code is far cleaner to have the parser read all | |||||
//of the data rather than having every component promise that it read | |||||
//the correct amount. | |||||
abstract public void handleRecord(int recordType, byte[] data) throws XSSFBParseException; | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import org.apache.poi.util.Internal; | |||||
@Internal | |||||
public enum XSSFBRecordType { | |||||
BrtCellBlank(1), | |||||
BrtCellRk(2), | |||||
BrtCellError(3), | |||||
BrtCellBool(4), | |||||
BrtCellReal(5), | |||||
BrtCellSt(6), | |||||
BrtCellIsst(7), | |||||
BrtFmlaString(8), | |||||
BrtFmlaNum(9), | |||||
BrtFmlaBool(10), | |||||
BrtFmlaError(11), | |||||
BrtRowHdr(0), | |||||
BrtCellRString(62), | |||||
BrtBeginSheet(129), | |||||
BrtWsProp(147), | |||||
BrtWsDim(148), | |||||
BrtColInfo(60), | |||||
BrtBeginSheetData(145), | |||||
BrtEndSheetData(146), | |||||
BrtHLink(494), | |||||
BrtBeginHeaderFooter(479), | |||||
//comments | |||||
BrtBeginCommentAuthors(630), | |||||
BrtEndCommentAuthors(631), | |||||
BrtCommentAuthor(632), | |||||
BrtBeginComment(635), | |||||
BrtCommentText(637), | |||||
BrtEndComment(636), | |||||
//styles table | |||||
BrtXf(47), | |||||
BrtFmt(44), | |||||
BrtBeginFmts(615), | |||||
BrtEndFmts(616), | |||||
BrtBeginCellXFs(617), | |||||
BrtEndCellXFs(618), | |||||
BrtBeginCellStyleXFS(626), | |||||
BrtEndCellStyleXFS(627), | |||||
//stored strings table | |||||
BrtSstItem(19), //stored strings items | |||||
BrtBeginSst(159), //stored strings begin sst | |||||
BrtEndSst(160), //stored strings end sst | |||||
BrtBundleSh(156), //defines worksheet in wb part | |||||
Unimplemented(-1); | |||||
private final int id; | |||||
XSSFBRecordType(int id) { | |||||
this.id = id; | |||||
} | |||||
public int getId() { | |||||
return id; | |||||
} | |||||
public static XSSFBRecordType lookup(int id) { | |||||
for (XSSFBRecordType r : XSSFBRecordType.values()) { | |||||
if (r.id == id) { | |||||
return r; | |||||
} | |||||
} | |||||
return Unimplemented; | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.util.Iterator; | |||||
import org.apache.poi.POIXMLDocumentPart; | |||||
import org.apache.poi.POIXMLRelation; | |||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||||
import org.apache.poi.openxml4j.opc.PackagePart; | |||||
import org.apache.poi.openxml4j.opc.PackagePartName; | |||||
import org.apache.poi.openxml4j.opc.PackageRelationship; | |||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; | |||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; | |||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.POILogFactory; | |||||
import org.apache.poi.util.POILogger; | |||||
/** | |||||
* Need to have this mirror class of {@link org.apache.poi.xssf.usermodel.XSSFRelation} | |||||
* because of conflicts with regular ooxml relations. | |||||
* If we failed to break this into a separate class, in the cases of SharedStrings and Styles, | |||||
* 2 parts would exist, and "Packages shall not contain equivalent part names..." | |||||
* <p> | |||||
* Also, we need to avoid the possibility of breaking the marshalling process for xml. | |||||
*/ | |||||
@Internal | |||||
public class XSSFBRelation extends POIXMLRelation { | |||||
private static final POILogger log = POILogFactory.getLogger(XSSFBRelation.class); | |||||
static final XSSFBRelation SHARED_STRINGS_BINARY = new XSSFBRelation( | |||||
"application/vnd.ms-excel.sharedStrings", | |||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings", | |||||
"/xl/sharedStrings.bin", | |||||
null | |||||
); | |||||
public static final XSSFBRelation STYLES_BINARY = new XSSFBRelation( | |||||
"application/vnd.ms-excel.styles", | |||||
PackageRelationshipTypes.STYLE_PART, | |||||
"/xl/styles.bin", | |||||
null | |||||
); | |||||
private XSSFBRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) { | |||||
super(type, rel, defaultName, cls); | |||||
} | |||||
/** | |||||
* Fetches the InputStream to read the contents, based | |||||
* of the specified core part, for which we are defined | |||||
* as a suitable relationship | |||||
*/ | |||||
public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException { | |||||
PackageRelationshipCollection prc = | |||||
corePart.getRelationshipsByType(getRelation()); | |||||
Iterator<PackageRelationship> it = prc.iterator(); | |||||
if (it.hasNext()) { | |||||
PackageRelationship rel = it.next(); | |||||
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); | |||||
PackagePart part = corePart.getPackage().getPart(relName); | |||||
return part.getInputStream(); | |||||
} | |||||
log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found"); | |||||
return null; | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import org.apache.poi.util.Internal; | |||||
@Internal | |||||
class XSSFBRichStr { | |||||
public static XSSFBRichStr build(byte[] bytes, int offset) throws XSSFBParseException { | |||||
byte first = bytes[offset]; | |||||
boolean dwSizeStrRunExists = (first >> 7 & 1) == 1;//first bit == 1? | |||||
boolean phoneticExists = (first >> 6 & 1) == 1;//second bit == 1? | |||||
StringBuilder sb = new StringBuilder(); | |||||
int read = XSSFBUtils.readXLWideString(bytes, offset+1, sb); | |||||
//TODO: parse phonetic strings. | |||||
return new XSSFBRichStr(sb.toString(), ""); | |||||
} | |||||
private final String string; | |||||
private final String phoneticString; | |||||
XSSFBRichStr(String string, String phoneticString) { | |||||
this.string = string; | |||||
this.phoneticString = phoneticString; | |||||
} | |||||
public String getString() { | |||||
return string; | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import org.apache.poi.ss.usermodel.Font; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.xssf.usermodel.XSSFRichTextString; | |||||
/** | |||||
* Wrapper class around String so that we can use it in Comment. | |||||
* Nothing has been implemented yet except for {@link #getString()}. | |||||
*/ | |||||
@Internal | |||||
class XSSFBRichTextString extends XSSFRichTextString { | |||||
private final String string; | |||||
XSSFBRichTextString(String string) { | |||||
this.string = string; | |||||
} | |||||
@Override | |||||
public void applyFont(int startIndex, int endIndex, short fontIndex) { | |||||
} | |||||
@Override | |||||
public void applyFont(int startIndex, int endIndex, Font font) { | |||||
} | |||||
@Override | |||||
public void applyFont(Font font) { | |||||
} | |||||
@Override | |||||
public void clearFormatting() { | |||||
} | |||||
@Override | |||||
public String getString() { | |||||
return string; | |||||
} | |||||
@Override | |||||
public int length() { | |||||
return string.length(); | |||||
} | |||||
@Override | |||||
public int numFormattingRuns() { | |||||
return 0; | |||||
} | |||||
@Override | |||||
public int getIndexOfFormattingRun(int index) { | |||||
return 0; | |||||
} | |||||
@Override | |||||
public void applyFont(short fontIndex) { | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.util.ArrayList; | |||||
import java.util.List; | |||||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||||
import org.apache.poi.openxml4j.opc.PackagePart; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.LittleEndian; | |||||
import org.xml.sax.SAXException; | |||||
@Internal | |||||
public class XSSFBSharedStringsTable { | |||||
/** | |||||
* An integer representing the total count of strings in the workbook. This count does not | |||||
* include any numbers, it counts only the total of text strings in the workbook. | |||||
*/ | |||||
private int count; | |||||
/** | |||||
* An integer representing the total count of unique strings in the Shared String Table. | |||||
* A string is unique even if it is a copy of another string, but has different formatting applied | |||||
* at the character level. | |||||
*/ | |||||
private int uniqueCount; | |||||
/** | |||||
* The shared strings table. | |||||
*/ | |||||
private List<String> strings = new ArrayList<String>(); | |||||
/** | |||||
* @param pkg The {@link OPCPackage} to use as basis for the shared-strings table. | |||||
* @throws IOException If reading the data from the package fails. | |||||
* @throws SAXException if parsing the XML data fails. | |||||
*/ | |||||
public XSSFBSharedStringsTable(OPCPackage pkg) | |||||
throws IOException, SAXException { | |||||
ArrayList<PackagePart> parts = | |||||
pkg.getPartsByContentType(XSSFBRelation.SHARED_STRINGS_BINARY.getContentType()); | |||||
// Some workbooks have no shared strings table. | |||||
if (parts.size() > 0) { | |||||
PackagePart sstPart = parts.get(0); | |||||
readFrom(sstPart.getInputStream()); | |||||
} | |||||
} | |||||
/** | |||||
* Like POIXMLDocumentPart constructor | |||||
* | |||||
* @since POI 3.14-Beta3 | |||||
*/ | |||||
XSSFBSharedStringsTable(PackagePart part) throws IOException, SAXException { | |||||
readFrom(part.getInputStream()); | |||||
} | |||||
private void readFrom(InputStream inputStream) throws IOException { | |||||
SSTBinaryReader reader = new SSTBinaryReader(inputStream); | |||||
reader.parse(); | |||||
} | |||||
public List<String> getItems() { | |||||
return strings; | |||||
} | |||||
public String getEntryAt(int i) { | |||||
return strings.get(i); | |||||
} | |||||
/** | |||||
* Return an integer representing the total count of strings in the workbook. This count does not | |||||
* include any numbers, it counts only the total of text strings in the workbook. | |||||
* | |||||
* @return the total count of strings in the workbook | |||||
*/ | |||||
public int getCount() { | |||||
return this.count; | |||||
} | |||||
/** | |||||
* Returns an integer representing the total count of unique strings in the Shared String Table. | |||||
* A string is unique even if it is a copy of another string, but has different formatting applied | |||||
* at the character level. | |||||
* | |||||
* @return the total count of unique strings in the workbook | |||||
*/ | |||||
public int getUniqueCount() { | |||||
return this.uniqueCount; | |||||
} | |||||
private class SSTBinaryReader extends XSSFBParser { | |||||
SSTBinaryReader(InputStream is) { | |||||
super(is); | |||||
} | |||||
@Override | |||||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { | |||||
XSSFBRecordType type = XSSFBRecordType.lookup(recordType); | |||||
switch (type) { | |||||
case BrtSstItem: | |||||
XSSFBRichStr rstr = XSSFBRichStr.build(data, 0); | |||||
strings.add(rstr.getString()); | |||||
break; | |||||
case BrtBeginSst: | |||||
count = (int) LittleEndian.getUInt(data,0); | |||||
uniqueCount = (int) LittleEndian.getUInt(data, 4); | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import java.io.InputStream; | |||||
import java.util.Queue; | |||||
import org.apache.poi.ss.usermodel.DataFormatter; | |||||
import org.apache.poi.ss.util.CellAddress; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.LittleEndian; | |||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; | |||||
import org.apache.poi.xssf.usermodel.XSSFComment; | |||||
import org.apache.poi.xssf.usermodel.XSSFRichTextString; | |||||
@Internal | |||||
public class XSSFBSheetHandler extends XSSFBParser { | |||||
private final static int CHECK_ALL_ROWS = -1; | |||||
private final XSSFBSharedStringsTable stringsTable; | |||||
private final XSSFSheetXMLHandler.SheetContentsHandler handler; | |||||
private final XSSFBStylesTable styles; | |||||
private final XSSFBCommentsTable comments; | |||||
private final DataFormatter dataFormatter; | |||||
private final boolean formulasNotResults;//TODO: implement this | |||||
private int lastEndedRow = -1; | |||||
private int lastStartedRow = -1; | |||||
private int currentRow = 0; | |||||
private byte[] rkBuffer = new byte[8]; | |||||
private XSSFBCellRange hyperlinkCellRange = null; | |||||
private StringBuilder xlWideStringBuffer = new StringBuilder(); | |||||
private final XSSFBCellHeader cellBuffer = new XSSFBCellHeader(); | |||||
public XSSFBSheetHandler(InputStream is, | |||||
XSSFBStylesTable styles, | |||||
XSSFBCommentsTable comments, | |||||
XSSFBSharedStringsTable strings, | |||||
XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler, | |||||
DataFormatter dataFormatter, | |||||
boolean formulasNotResults) { | |||||
super(is); | |||||
this.styles = styles; | |||||
this.comments = comments; | |||||
this.stringsTable = strings; | |||||
this.handler = sheetContentsHandler; | |||||
this.dataFormatter = dataFormatter; | |||||
this.formulasNotResults = formulasNotResults; | |||||
} | |||||
@Override | |||||
public void handleRecord(int id, byte[] data) throws XSSFBParseException { | |||||
XSSFBRecordType type = XSSFBRecordType.lookup(id); | |||||
switch(type) { | |||||
case BrtRowHdr: | |||||
long rw = LittleEndian.getUInt(data, 0); | |||||
if (rw > 0x00100000L) {//could make sure this is larger than currentRow, according to spec? | |||||
throw new XSSFBParseException("Row number beyond allowable range: "+rw); | |||||
} | |||||
currentRow = (int)rw; | |||||
checkMissedComments(currentRow); | |||||
startRow(currentRow); | |||||
break; | |||||
case BrtCellIsst: | |||||
handleBrtCellIsst(data); | |||||
break; | |||||
case BrtCellSt: //TODO: needs test | |||||
handleCellSt(data); | |||||
break; | |||||
case BrtCellRk: | |||||
handleCellRk(data); | |||||
break; | |||||
case BrtCellReal: | |||||
handleCellReal(data); | |||||
break; | |||||
case BrtCellBool: | |||||
handleBoolean(data); | |||||
break; | |||||
case BrtCellError: | |||||
handleCellError(data); | |||||
break; | |||||
case BrtCellBlank: | |||||
beforeCellValue(data);//read cell info and check for missing comments | |||||
break; | |||||
case BrtFmlaString: | |||||
handleFmlaString(data); | |||||
break; | |||||
case BrtFmlaNum: | |||||
handleFmlaNum(data); | |||||
break; | |||||
case BrtFmlaError: | |||||
handleFmlaError(data); | |||||
break; | |||||
//TODO: All the PCDI and PCDIA | |||||
case BrtEndSheetData: | |||||
checkMissedComments(CHECK_ALL_ROWS); | |||||
endRow(lastStartedRow); | |||||
break; | |||||
case BrtBeginHeaderFooter: | |||||
handleHeaderFooter(data); | |||||
break; | |||||
} | |||||
} | |||||
private void beforeCellValue(byte[] data) { | |||||
XSSFBCellHeader.parse(data, 0, currentRow, cellBuffer); | |||||
checkMissedComments(currentRow, cellBuffer.getColNum()); | |||||
} | |||||
private void handleCellValue(String formattedValue) { | |||||
CellAddress cellAddress = new CellAddress(currentRow, cellBuffer.getColNum()); | |||||
XSSFBComment comment = null; | |||||
if (comments != null) { | |||||
comment = comments.get(cellAddress); | |||||
} | |||||
handler.cell(cellAddress.formatAsString(), formattedValue, comment); | |||||
} | |||||
private void handleFmlaNum(byte[] data) { | |||||
beforeCellValue(data); | |||||
//xNum | |||||
double val = LittleEndian.getDouble(data, XSSFBCellHeader.length); | |||||
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx()); | |||||
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString); | |||||
handleCellValue(formattedVal); | |||||
} | |||||
private void handleCellSt(byte[] data) { | |||||
beforeCellValue(data); | |||||
xlWideStringBuffer.setLength(0); | |||||
XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer); | |||||
handleCellValue(xlWideStringBuffer.toString()); | |||||
} | |||||
private void handleFmlaString(byte[] data) { | |||||
beforeCellValue(data); | |||||
xlWideStringBuffer.setLength(0); | |||||
XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer); | |||||
handleCellValue(xlWideStringBuffer.toString()); | |||||
} | |||||
private void handleCellError(byte[] data) { | |||||
beforeCellValue(data); | |||||
//TODO, read byte to figure out the type of error | |||||
handleCellValue("ERROR"); | |||||
} | |||||
private void handleFmlaError(byte[] data) { | |||||
beforeCellValue(data); | |||||
//TODO, read byte to figure out the type of error | |||||
handleCellValue("ERROR"); | |||||
} | |||||
private void handleBoolean(byte[] data) { | |||||
beforeCellValue(data); | |||||
String formattedVal = (data[XSSFBCellHeader.length] == 1) ? "TRUE" : "FALSE"; | |||||
handleCellValue(formattedVal); | |||||
} | |||||
private void handleCellReal(byte[] data) { | |||||
beforeCellValue(data); | |||||
//xNum | |||||
double val = LittleEndian.getDouble(data, XSSFBCellHeader.length); | |||||
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx()); | |||||
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString); | |||||
handleCellValue(formattedVal); | |||||
} | |||||
private void handleCellRk(byte[] data) { | |||||
beforeCellValue(data); | |||||
double val = rkNumber(data, XSSFBCellHeader.length); | |||||
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx()); | |||||
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString); | |||||
handleCellValue(formattedVal); | |||||
} | |||||
private void handleBrtCellIsst(byte[] data) { | |||||
beforeCellValue(data); | |||||
long idx = LittleEndian.getUInt(data, XSSFBCellHeader.length); | |||||
//check for out of range, buffer overflow | |||||
XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt((int)idx)); | |||||
handleCellValue(rtss.getString()); | |||||
} | |||||
private void handleHeaderFooter(byte[] data) { | |||||
XSSFBHeaderFooters headerFooter = XSSFBHeaderFooters.parse(data); | |||||
outputHeaderFooter(headerFooter.getHeader()); | |||||
outputHeaderFooter(headerFooter.getFooter()); | |||||
outputHeaderFooter(headerFooter.getHeaderEven()); | |||||
outputHeaderFooter(headerFooter.getFooterEven()); | |||||
outputHeaderFooter(headerFooter.getHeaderFirst()); | |||||
outputHeaderFooter(headerFooter.getFooterFirst()); | |||||
} | |||||
private void outputHeaderFooter(XSSFBHeaderFooter headerFooter) { | |||||
String text = headerFooter.getString(); | |||||
if (text != null && text.trim().length() > 0) { | |||||
handler.headerFooter(text, headerFooter.isHeader(), headerFooter.getHeaderFooterTypeLabel()); | |||||
} | |||||
} | |||||
//at start of next cell or end of row, return the cellAddress if it equals currentRow and col | |||||
private void checkMissedComments(int currentRow, int colNum) { | |||||
if (comments == null) { | |||||
return; | |||||
} | |||||
Queue<CellAddress> queue = comments.getAddresses(); | |||||
while (queue.size() > 0) { | |||||
CellAddress cellAddress = queue.peek(); | |||||
if (cellAddress.getRow() == currentRow && cellAddress.getColumn() < colNum) { | |||||
cellAddress = queue.remove(); | |||||
dumpEmptyCellComment(cellAddress, comments.get(cellAddress)); | |||||
} else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() == colNum) { | |||||
queue.remove(); | |||||
return; | |||||
} else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() > colNum) { | |||||
return; | |||||
} else if (cellAddress.getRow() > currentRow) { | |||||
return; | |||||
} | |||||
} | |||||
} | |||||
//check for anything from rows before | |||||
private void checkMissedComments(int currentRow) { | |||||
if (comments == null) { | |||||
return; | |||||
} | |||||
Queue<CellAddress> queue = comments.getAddresses(); | |||||
int lastInterpolatedRow = -1; | |||||
while (queue.size() > 0) { | |||||
CellAddress cellAddress = queue.peek(); | |||||
if (currentRow == CHECK_ALL_ROWS || cellAddress.getRow() < currentRow) { | |||||
cellAddress = queue.remove(); | |||||
if (cellAddress.getRow() != lastInterpolatedRow) { | |||||
startRow(cellAddress.getRow()); | |||||
} | |||||
dumpEmptyCellComment(cellAddress, comments.get(cellAddress)); | |||||
lastInterpolatedRow = cellAddress.getRow(); | |||||
} else { | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
private void startRow(int row) { | |||||
if (row == lastStartedRow) { | |||||
return; | |||||
} | |||||
if (lastStartedRow != lastEndedRow) { | |||||
endRow(lastStartedRow); | |||||
} | |||||
handler.startRow(row); | |||||
lastStartedRow = row; | |||||
} | |||||
private void endRow(int row) { | |||||
if (lastEndedRow == row) { | |||||
return; | |||||
} | |||||
handler.endRow(row); | |||||
lastEndedRow = row; | |||||
} | |||||
private void dumpEmptyCellComment(CellAddress cellAddress, XSSFBComment comment) { | |||||
handler.cell(cellAddress.formatAsString(), null, comment); | |||||
} | |||||
private double rkNumber(byte[] data, int offset) { | |||||
//see 2.5.122 for this abomination | |||||
byte b0 = data[offset]; | |||||
String s = Integer.toString(b0, 2); | |||||
boolean numDivBy100 = ((b0 & 1) == 1); // else as is | |||||
boolean floatingPoint = ((b0 >> 1 & 1) == 0); // else signed integer | |||||
//unset highest 2 bits | |||||
b0 &= ~1; | |||||
b0 &= ~(1<<1); | |||||
rkBuffer[4] = b0; | |||||
for (int i = 1; i < 4; i++) { | |||||
rkBuffer[i+4] = data[offset+i]; | |||||
} | |||||
double d = 0.0; | |||||
if (floatingPoint) { | |||||
d = LittleEndian.getDouble(rkBuffer); | |||||
} else { | |||||
d = LittleEndian.getInt(rkBuffer); | |||||
} | |||||
d = (numDivBy100) ? d/100 : d; | |||||
return d; | |||||
} | |||||
/** | |||||
* You need to implement this to handle the results | |||||
* of the sheet parsing. | |||||
*/ | |||||
public interface SheetContentsHandler extends XSSFSheetXMLHandler.SheetContentsHandler { | |||||
/** | |||||
* A cell, with the given formatted value (may be null), | |||||
* a url (may be null), a toolTip (may be null) | |||||
* and possibly a comment (may be null), was encountered */ | |||||
void hyperlinkCell(String cellReference, String formattedValue, String url, String toolTip, XSSFComment comment); | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.util.ArrayList; | |||||
import java.util.List; | |||||
import java.util.SortedMap; | |||||
import java.util.TreeMap; | |||||
import org.apache.poi.POIXMLException; | |||||
import org.apache.poi.ss.usermodel.BuiltinFormats; | |||||
import org.apache.poi.util.Internal; | |||||
@Internal | |||||
public class XSSFBStylesTable extends XSSFBParser { | |||||
private final SortedMap<Short, String> numberFormats = new TreeMap<Short,String>(); | |||||
private final List<Short> styleIds = new ArrayList<Short>(); | |||||
private boolean inCellXFS = false; | |||||
private boolean inFmts = false; | |||||
public XSSFBStylesTable(InputStream is) throws IOException { | |||||
super(is); | |||||
parse(); | |||||
} | |||||
String getNumberFormatString(int idx) { | |||||
if (numberFormats.containsKey(styleIds.get((short)idx))) { | |||||
return numberFormats.get(styleIds.get((short)idx)); | |||||
} | |||||
return BuiltinFormats.getBuiltinFormat(styleIds.get((short)idx)); | |||||
} | |||||
@Override | |||||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { | |||||
XSSFBRecordType type = XSSFBRecordType.lookup(recordType); | |||||
switch (type) { | |||||
case BrtBeginCellXFs: | |||||
inCellXFS = true; | |||||
break; | |||||
case BrtEndCellXFs: | |||||
inCellXFS = false; | |||||
break; | |||||
case BrtXf: | |||||
if (inCellXFS) { | |||||
handleBrtXFInCellXF(data); | |||||
} | |||||
break; | |||||
case BrtBeginFmts: | |||||
inFmts = true; | |||||
break; | |||||
case BrtEndFmts: | |||||
inFmts = false; | |||||
break; | |||||
case BrtFmt: | |||||
if (inFmts) { | |||||
handleFormat(data); | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
private void handleFormat(byte[] data) { | |||||
int ifmt = data[0] & 0xFF; | |||||
if (ifmt > Short.MAX_VALUE) { | |||||
throw new POIXMLException("Format id must be a short"); | |||||
} | |||||
StringBuilder sb = new StringBuilder(); | |||||
XSSFBUtils.readXLWideString(data, 2, sb); | |||||
String fmt = sb.toString(); | |||||
numberFormats.put((short)ifmt, fmt); | |||||
} | |||||
private void handleBrtXFInCellXF(byte[] data) { | |||||
int ifmtOffset = 2; | |||||
//int ifmtLength = 2; | |||||
//numFmtId in xml terms | |||||
int ifmt = data[ifmtOffset] & 0xFF;//the second byte is ignored | |||||
styleIds.add((short)ifmt); | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import java.nio.charset.Charset; | |||||
import org.apache.poi.POIXMLException; | |||||
import org.apache.poi.util.Internal; | |||||
import org.apache.poi.util.LittleEndian; | |||||
@Internal | |||||
public class XSSFBUtils { | |||||
/** | |||||
* Reads an XLNullableWideString. | |||||
* @param data data from which to read | |||||
* @param offset in data from which to start | |||||
* @param sb buffer to which to write. You must setLength(0) before calling! | |||||
* @return number of bytes read | |||||
* @throws XSSFBParseException if there was an exception during reading | |||||
*/ | |||||
static int readXLNullableWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException { | |||||
long numChars = LittleEndian.getUInt(data, offset); | |||||
if (numChars < 0) { | |||||
throw new XSSFBParseException("too few chars to read"); | |||||
} else if (numChars == 0xFFFFFFFFL) { //this means null value (2.5.166), do not read any bytes!!! | |||||
return 0; | |||||
} else if (numChars > 0xFFFFFFFFL) { | |||||
throw new XSSFBParseException("too many chars to read"); | |||||
} | |||||
int numBytes = 2*(int)numChars; | |||||
offset += 4; | |||||
if (offset+numBytes > data.length) { | |||||
throw new XSSFBParseException("trying to read beyond data length:" + | |||||
"offset="+offset+", numBytes="+numBytes+", data.length="+data.length); | |||||
} | |||||
sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE"))); | |||||
numBytes+=4; | |||||
return numBytes; | |||||
} | |||||
/** | |||||
* Reads an XLNullableWideString. | |||||
* @param data data from which to read | |||||
* @param offset in data from which to start | |||||
* @param sb buffer to which to write. You must setLength(0) before calling! | |||||
* @return number of bytes read | |||||
* @throws XSSFBParseException if there was an exception while trying to read the string | |||||
*/ | |||||
public static int readXLWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException { | |||||
long numChars = LittleEndian.getUInt(data, offset); | |||||
if (numChars < 0) { | |||||
throw new XSSFBParseException("too few chars to read"); | |||||
} else if (numChars > 0xFFFFFFFFL) { | |||||
throw new XSSFBParseException("too many chars to read"); | |||||
} | |||||
int numBytes = 2*(int)numChars; | |||||
offset += 4; | |||||
if (offset+numBytes > data.length) { | |||||
throw new XSSFBParseException("trying to read beyond data length"); | |||||
} | |||||
sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE"))); | |||||
numBytes+=4; | |||||
return numBytes; | |||||
} | |||||
static int castToInt(long val) { | |||||
if (val < Integer.MAX_VALUE && val > Integer.MIN_VALUE) { | |||||
return (int)val; | |||||
} | |||||
throw new POIXMLException("val ("+val+") can't be cast to int"); | |||||
} | |||||
static short castToShort(int val) { | |||||
if (val < Short.MAX_VALUE && val > Short.MIN_VALUE) { | |||||
return (short)val; | |||||
} | |||||
throw new POIXMLException("val ("+val+") can't be cast to short"); | |||||
} | |||||
//TODO: move to LittleEndian? | |||||
static int get24BitInt( byte[] data, int offset) { | |||||
int i = offset; | |||||
int b0 = data[i++] & 0xFF; | |||||
int b1 = data[i++] & 0xFF; | |||||
int b2 = data[i] & 0xFF; | |||||
return ( b2 << 16 ) + ( b1 << 8 ) + b0; | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import org.apache.poi.ss.util.CellRangeAddress; | |||||
import org.apache.poi.util.Internal; | |||||
/** | |||||
* This is a read only record that maintains information about | |||||
* a hyperlink. In OOXML land, this information has to be merged | |||||
* from 1) the sheet's .rels to get the url and 2) from after the | |||||
* sheet data in they hyperlink section. | |||||
* | |||||
* The {@link #display} is often empty and should be filled from | |||||
* the contents of the anchor cell. | |||||
* | |||||
*/ | |||||
@Internal | |||||
public class XSSFHyperlinkRecord { | |||||
private final CellRangeAddress cellRangeAddress; | |||||
private final String relId; | |||||
private String location; | |||||
private String toolTip; | |||||
private String display; | |||||
XSSFHyperlinkRecord(CellRangeAddress cellRangeAddress, String relId, String location, String toolTip, String display) { | |||||
this.cellRangeAddress = cellRangeAddress; | |||||
this.relId = relId; | |||||
this.location = location; | |||||
this.toolTip = toolTip; | |||||
this.display = display; | |||||
} | |||||
void setLocation(String location) { | |||||
this.location = location; | |||||
} | |||||
void setToolTip(String toolTip) { | |||||
this.toolTip = toolTip; | |||||
} | |||||
void setDisplay(String display) { | |||||
this.display = display; | |||||
} | |||||
CellRangeAddress getCellRangeAddress() { | |||||
return cellRangeAddress; | |||||
} | |||||
public String getRelId() { | |||||
return relId; | |||||
} | |||||
public String getLocation() { | |||||
return location; | |||||
} | |||||
public String getToolTip() { | |||||
return toolTip; | |||||
} | |||||
public String getDisplay() { | |||||
return display; | |||||
} | |||||
@Override | |||||
public boolean equals(Object o) { | |||||
if (this == o) return true; | |||||
if (o == null || getClass() != o.getClass()) return false; | |||||
XSSFHyperlinkRecord that = (XSSFHyperlinkRecord) o; | |||||
if (cellRangeAddress != null ? !cellRangeAddress.equals(that.cellRangeAddress) : that.cellRangeAddress != null) | |||||
return false; | |||||
if (relId != null ? !relId.equals(that.relId) : that.relId != null) return false; | |||||
if (location != null ? !location.equals(that.location) : that.location != null) return false; | |||||
if (toolTip != null ? !toolTip.equals(that.toolTip) : that.toolTip != null) return false; | |||||
return display != null ? display.equals(that.display) : that.display == null; | |||||
} | |||||
@Override | |||||
public int hashCode() { | |||||
int result = cellRangeAddress != null ? cellRangeAddress.hashCode() : 0; | |||||
result = 31 * result + (relId != null ? relId.hashCode() : 0); | |||||
result = 31 * result + (location != null ? location.hashCode() : 0); | |||||
result = 31 * result + (toolTip != null ? toolTip.hashCode() : 0); | |||||
result = 31 * result + (display != null ? display.hashCode() : 0); | |||||
return result; | |||||
} | |||||
@Override | |||||
public String toString() { | |||||
return "XSSFHyperlinkRecord{" + | |||||
"cellRangeAddress=" + cellRangeAddress + | |||||
", relId='" + relId + '\'' + | |||||
", location='" + location + '\'' + | |||||
", toolTip='" + toolTip + '\'' + | |||||
", display='" + display + '\'' + | |||||
'}'; | |||||
} | |||||
} |
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> | |||||
<!-- | |||||
==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== | |||||
--> | |||||
<html> | |||||
<head> | |||||
</head> | |||||
<body bgcolor="white"> | |||||
<p>The org.apache.poi.xssf.binary package includes necessary underlying components | |||||
for streaming/read-only processing of xlsb files. | |||||
</p> | |||||
<p> | |||||
POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader | |||||
in o.a.p.xssf.eventusermodel. | |||||
</p> | |||||
<p> | |||||
This feature was added in poi-3.15-beta3 and should be considered experimental. Most classes | |||||
have been marked @Internal and the API is subject to change. | |||||
</p> | |||||
<h2>Related Documentation</h2> | |||||
For overviews, tutorials, examples, guides, and tool documentation, please see: | |||||
<ul> | |||||
<li><a href="http://poi.apache.org">Apache POI Project</a> | |||||
</ul> | |||||
</body> | |||||
</html> |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.eventusermodel; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import java.util.ArrayList; | |||||
import java.util.Iterator; | |||||
import java.util.LinkedList; | |||||
import java.util.List; | |||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | |||||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||||
import org.apache.poi.openxml4j.opc.PackagePart; | |||||
import org.apache.poi.openxml4j.opc.PackagePartName; | |||||
import org.apache.poi.openxml4j.opc.PackageRelationship; | |||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; | |||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper; | |||||
import org.apache.poi.util.LittleEndian; | |||||
import org.apache.poi.xssf.binary.XSSFBCommentsTable; | |||||
import org.apache.poi.xssf.binary.XSSFBParseException; | |||||
import org.apache.poi.xssf.binary.XSSFBParser; | |||||
import org.apache.poi.xssf.binary.XSSFBRecordType; | |||||
import org.apache.poi.xssf.binary.XSSFBRelation; | |||||
import org.apache.poi.xssf.binary.XSSFBStylesTable; | |||||
import org.apache.poi.xssf.binary.XSSFBUtils; | |||||
import org.apache.poi.xssf.model.CommentsTable; | |||||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||||
/** | |||||
* Reader for xlsb files. | |||||
*/ | |||||
public class XSSFBReader extends XSSFReader { | |||||
/** | |||||
* Creates a new XSSFReader, for the given package | |||||
* | |||||
* @param pkg opc package | |||||
*/ | |||||
public XSSFBReader(OPCPackage pkg) throws IOException, OpenXML4JException { | |||||
super(pkg); | |||||
} | |||||
/** | |||||
* Returns an Iterator which will let you get at all the | |||||
* different Sheets in turn. | |||||
* Each sheet's InputStream is only opened when fetched | |||||
* from the Iterator. It's up to you to close the | |||||
* InputStreams when done with each one. | |||||
*/ | |||||
@Override | |||||
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException { | |||||
return new SheetIterator(workbookPart); | |||||
} | |||||
public XSSFBStylesTable getXSSFBStylesTable() throws IOException { | |||||
ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFBRelation.STYLES_BINARY.getContentType()); | |||||
if(parts.size() == 0) return null; | |||||
// Create the Styles Table, and associate the Themes if present | |||||
return new XSSFBStylesTable(parts.get(0).getInputStream()); | |||||
} | |||||
public static class SheetIterator extends XSSFReader.SheetIterator { | |||||
/** | |||||
* Construct a new SheetIterator | |||||
* | |||||
* @param wb package part holding workbook.xml | |||||
*/ | |||||
private SheetIterator(PackagePart wb) throws IOException { | |||||
super(wb); | |||||
} | |||||
Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException { | |||||
SheetRefLoader sheetRefLoader = new SheetRefLoader(wb.getInputStream()); | |||||
sheetRefLoader.parse(); | |||||
return sheetRefLoader.getSheets().iterator(); | |||||
} | |||||
/** | |||||
* Not supported by XSSFBReader's SheetIterator. | |||||
* Please use {@link #getXSSFBSheetComments()} instead. | |||||
* @return nothing, always throws IllegalArgumentException! | |||||
*/ | |||||
@Override | |||||
public CommentsTable getSheetComments() { | |||||
throw new IllegalArgumentException("Please use getXSSFBSheetComments"); | |||||
} | |||||
public XSSFBCommentsTable getXSSFBSheetComments() { | |||||
PackagePart sheetPkg = getSheetPart(); | |||||
// Do we have a comments relationship? (Only ever one if so) | |||||
try { | |||||
PackageRelationshipCollection commentsList = | |||||
sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation()); | |||||
if (commentsList.size() > 0) { | |||||
PackageRelationship comments = commentsList.getRelationship(0); | |||||
if (comments == null || comments.getTargetURI() == null) { | |||||
return null; | |||||
} | |||||
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI()); | |||||
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName); | |||||
return new XSSFBCommentsTable(commentsPart.getInputStream()); | |||||
} | |||||
} catch (InvalidFormatException e) { | |||||
return null; | |||||
} catch (IOException e) { | |||||
return null; | |||||
} | |||||
return null; | |||||
} | |||||
} | |||||
private static class SheetRefLoader extends XSSFBParser { | |||||
List<XSSFSheetRef> sheets = new LinkedList<XSSFSheetRef>(); | |||||
private SheetRefLoader(InputStream is) { | |||||
super(is); | |||||
} | |||||
@Override | |||||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException { | |||||
if (recordType == XSSFBRecordType.BrtBundleSh.getId()) { | |||||
addWorksheet(data); | |||||
} | |||||
} | |||||
private void addWorksheet(byte[] data) { | |||||
int offset = 0; | |||||
//this is the sheet state #2.5.142 | |||||
long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; | |||||
long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE; | |||||
//according to #2.4.304 | |||||
if (iTabID < 1 || iTabID > 0x0000FFFFL) { | |||||
throw new XSSFBParseException("table id out of range: "+iTabID); | |||||
} | |||||
StringBuilder sb = new StringBuilder(); | |||||
offset += XSSFBUtils.readXLWideString(data, offset, sb); | |||||
String relId = sb.toString(); | |||||
sb.setLength(0); | |||||
XSSFBUtils.readXLWideString(data, offset, sb); | |||||
String name = sb.toString(); | |||||
if (relId != null && relId.trim().length() > 0) { | |||||
sheets.add(new XSSFSheetRef(relId, name)); | |||||
} | |||||
} | |||||
List<XSSFSheetRef> getSheets() { | |||||
return sheets; | |||||
} | |||||
} | |||||
} |
==================================================================== */ | ==================================================================== */ | ||||
package org.apache.poi.xssf.eventusermodel; | package org.apache.poi.xssf.eventusermodel; | ||||
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; | |||||
import javax.xml.parsers.ParserConfigurationException; | |||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.InputStream; | import java.io.InputStream; | ||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.Collections; | |||||
import java.util.HashMap; | import java.util.HashMap; | ||||
import java.util.Iterator; | import java.util.Iterator; | ||||
import java.util.LinkedList; | import java.util.LinkedList; | ||||
import java.util.List; | import java.util.List; | ||||
import java.util.Locale; | |||||
import java.util.Map; | import java.util.Map; | ||||
import org.apache.poi.POIXMLException; | import org.apache.poi.POIXMLException; | ||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper; | import org.apache.poi.openxml4j.opc.PackagingURIHelper; | ||||
import org.apache.poi.util.POILogFactory; | import org.apache.poi.util.POILogFactory; | ||||
import org.apache.poi.util.POILogger; | import org.apache.poi.util.POILogger; | ||||
import org.apache.poi.util.SAXHelper; | |||||
import org.apache.poi.xssf.model.CommentsTable; | import org.apache.poi.xssf.model.CommentsTable; | ||||
import org.apache.poi.xssf.model.SharedStringsTable; | import org.apache.poi.xssf.model.SharedStringsTable; | ||||
import org.apache.poi.xssf.model.StylesTable; | import org.apache.poi.xssf.model.StylesTable; | ||||
import org.apache.poi.xssf.usermodel.XSSFRelation; | import org.apache.poi.xssf.usermodel.XSSFRelation; | ||||
import org.apache.poi.xssf.usermodel.XSSFShape; | import org.apache.poi.xssf.usermodel.XSSFShape; | ||||
import org.apache.xmlbeans.XmlException; | import org.apache.xmlbeans.XmlException; | ||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; | |||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook; | |||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument; | |||||
import org.xml.sax.Attributes; | |||||
import org.xml.sax.InputSource; | |||||
import org.xml.sax.SAXException; | |||||
import org.xml.sax.XMLReader; | |||||
import org.xml.sax.helpers.DefaultHandler; | |||||
/** | /** | ||||
* This class makes it easy to get at individual parts | * This class makes it easy to get at individual parts | ||||
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class); | private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class); | ||||
private OPCPackage pkg; | |||||
private PackagePart workbookPart; | |||||
protected OPCPackage pkg; | |||||
protected PackagePart workbookPart; | |||||
/** | /** | ||||
* Creates a new XSSFReader, for the given package | * Creates a new XSSFReader, for the given package | ||||
private final Map<String, PackagePart> sheetMap; | private final Map<String, PackagePart> sheetMap; | ||||
/** | /** | ||||
* Current CTSheet bean | |||||
* Current sheet reference | |||||
*/ | */ | ||||
private CTSheet ctSheet; | |||||
XSSFSheetRef xssfSheetRef; | |||||
/** | /** | ||||
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order. | * Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order. | ||||
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order, | * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order, | ||||
* i.e. as they are stored in the underlying package | * i.e. as they are stored in the underlying package | ||||
*/ | */ | ||||
private final Iterator<CTSheet> sheetIterator; | |||||
final Iterator<XSSFSheetRef> sheetIterator; | |||||
/** | /** | ||||
* Construct a new SheetIterator | * Construct a new SheetIterator | ||||
* | * | ||||
* @param wb package part holding workbook.xml | * @param wb package part holding workbook.xml | ||||
*/ | */ | ||||
private SheetIterator(PackagePart wb) throws IOException { | |||||
SheetIterator(PackagePart wb) throws IOException { | |||||
/** | /** | ||||
* The order of sheets is defined by the order of CTSheet elements in workbook.xml | * The order of sheets is defined by the order of CTSheet elements in workbook.xml | ||||
sheetMap.put(rel.getId(), pkg.getPart(relName)); | sheetMap.put(rel.getId(), pkg.getPart(relName)); | ||||
} | } | ||||
} | } | ||||
//step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator | |||||
//Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search | |||||
CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook(); | |||||
List<CTSheet> validSheets = new ArrayList<CTSheet>(); | |||||
for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) { | |||||
//if there's no relationship id, silently skip the sheet | |||||
String sheetId = ctSheet.getId(); | |||||
if (sheetId != null && sheetId.length() > 0) { | |||||
validSheets.add(ctSheet); | |||||
} | |||||
} | |||||
sheetIterator = validSheets.iterator(); | |||||
//step 2. Read array of CTSheet elements, wrap it in a LinkedList | |||||
//and construct an iterator | |||||
sheetIterator = createSheetIteratorFromWB(wb); | |||||
} catch (InvalidFormatException e){ | } catch (InvalidFormatException e){ | ||||
throw new POIXMLException(e); | throw new POIXMLException(e); | ||||
} catch (XmlException e){ | |||||
} | |||||
} | |||||
Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException { | |||||
XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader(); | |||||
XMLReader xmlReader = null; | |||||
try { | |||||
xmlReader = SAXHelper.newXMLReader(); | |||||
} catch (ParserConfigurationException e) { | |||||
throw new POIXMLException(e); | |||||
} catch (SAXException e) { | |||||
throw new POIXMLException(e); | throw new POIXMLException(e); | ||||
} | } | ||||
xmlReader.setContentHandler(xmlSheetRefReader); | |||||
try { | |||||
xmlReader.parse(new InputSource(wb.getInputStream())); | |||||
} catch (SAXException e) { | |||||
throw new POIXMLException(e); | |||||
} | |||||
List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>(); | |||||
for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) { | |||||
//if there's no relationship id, silently skip the sheet | |||||
String sheetId = xssfSheetRef.getId(); | |||||
if (sheetId != null && sheetId.length() > 0) { | |||||
validSheets.add(xssfSheetRef); | |||||
} | |||||
} | |||||
return validSheets.iterator(); | |||||
} | } | ||||
/** | /** | ||||
* Returns <tt>true</tt> if the iteration has more elements. | * Returns <tt>true</tt> if the iteration has more elements. | ||||
* | * | ||||
*/ | */ | ||||
@Override | @Override | ||||
public InputStream next() { | public InputStream next() { | ||||
ctSheet = sheetIterator.next(); | |||||
xssfSheetRef = sheetIterator.next(); | |||||
String sheetId = ctSheet.getId(); | |||||
String sheetId = xssfSheetRef.getId(); | |||||
try { | try { | ||||
PackagePart sheetPkg = sheetMap.get(sheetId); | PackagePart sheetPkg = sheetMap.get(sheetId); | ||||
return sheetPkg.getInputStream(); | return sheetPkg.getInputStream(); | ||||
* @return name of the current sheet | * @return name of the current sheet | ||||
*/ | */ | ||||
public String getSheetName() { | public String getSheetName() { | ||||
return ctSheet.getName(); | |||||
return xssfSheetRef.getName(); | |||||
} | } | ||||
/** | /** | ||||
} | } | ||||
public PackagePart getSheetPart() { | public PackagePart getSheetPart() { | ||||
String sheetId = ctSheet.getId(); | |||||
String sheetId = xssfSheetRef.getId(); | |||||
return sheetMap.get(sheetId); | return sheetMap.get(sheetId); | ||||
} | } | ||||
throw new IllegalStateException("Not supported"); | throw new IllegalStateException("Not supported"); | ||||
} | } | ||||
} | } | ||||
protected final static class XSSFSheetRef { | |||||
//do we need to store sheetId, too? | |||||
private final String id; | |||||
private final String name; | |||||
public XSSFSheetRef(String id, String name) { | |||||
this.id = id; | |||||
this.name = name; | |||||
} | |||||
public String getId() { | |||||
return id; | |||||
} | |||||
public String getName() { | |||||
return name; | |||||
} | |||||
} | |||||
//scrapes sheet reference info and order from workbook.xml | |||||
private static class XMLSheetRefReader extends DefaultHandler { | |||||
private final static String SHEET = "sheet"; | |||||
private final static String ID = "id"; | |||||
private final static String NAME = "name"; | |||||
private final List<XSSFSheetRef> sheetRefs = new LinkedList(); | |||||
@Override | |||||
public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException { | |||||
if (localName.toLowerCase(Locale.US).equals(SHEET)) { | |||||
String name = null; | |||||
String id = null; | |||||
for (int i = 0; i < attrs.getLength(); i++) { | |||||
if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) { | |||||
name = attrs.getValue(i); | |||||
} else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) { | |||||
id = attrs.getValue(i); | |||||
} | |||||
sheetRefs.add(new XSSFSheetRef(id, name)); | |||||
} | |||||
} | |||||
} | |||||
List<XSSFSheetRef> getSheetRefs() { | |||||
return Collections.unmodifiableList(sheetRefs); | |||||
} | |||||
} | |||||
} | } |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.extractor; | |||||
import java.io.IOException; | |||||
import java.io.InputStream; | |||||
import org.apache.poi.POIXMLTextExtractor; | |||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | |||||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||||
import org.apache.poi.ss.usermodel.DataFormatter; | |||||
import org.apache.poi.xssf.binary.XSSFBCommentsTable; | |||||
import org.apache.poi.xssf.binary.XSSFBHyperlinksTable; | |||||
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable; | |||||
import org.apache.poi.xssf.binary.XSSFBSheetHandler; | |||||
import org.apache.poi.xssf.binary.XSSFBStylesTable; | |||||
import org.apache.poi.xssf.eventusermodel.XSSFBReader; | |||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; | |||||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||||
import org.apache.xmlbeans.XmlException; | |||||
import org.xml.sax.SAXException; | |||||
/** | |||||
* Implementation of a text extractor or xlsb Excel | |||||
* files that uses SAX-like binary parsing. | |||||
*/ | |||||
public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor | |||||
implements org.apache.poi.ss.extractor.ExcelExtractor { | |||||
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] { | |||||
XSSFRelation.XLSB_BINARY_WORKBOOK | |||||
}; | |||||
private boolean handleHyperlinksInCells = false; | |||||
public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { | |||||
super(path); | |||||
} | |||||
public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException { | |||||
super(container); | |||||
} | |||||
public static void main(String[] args) throws Exception { | |||||
if (args.length < 1) { | |||||
System.err.println("Use:"); | |||||
System.err.println(" XSSFBEventBasedExcelExtractor <filename.xlsb>"); | |||||
System.exit(1); | |||||
} | |||||
POIXMLTextExtractor extractor = | |||||
new XSSFBEventBasedExcelExtractor(args[0]); | |||||
System.out.println(extractor.getText()); | |||||
extractor.close(); | |||||
} | |||||
public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) { | |||||
this.handleHyperlinksInCells = handleHyperlinksInCells; | |||||
} | |||||
/** | |||||
* Should we return the formula itself, and not | |||||
* the result it produces? Default is false | |||||
* This is currently unsupported for xssfb | |||||
*/ | |||||
@Override | |||||
public void setFormulasNotResults(boolean formulasNotResults) { | |||||
throw new IllegalArgumentException("Not currently supported"); | |||||
} | |||||
/** | |||||
* Processes the given sheet | |||||
*/ | |||||
public void processSheet( | |||||
SheetContentsHandler sheetContentsExtractor, | |||||
XSSFBStylesTable styles, | |||||
XSSFBCommentsTable comments, | |||||
XSSFBSharedStringsTable strings, | |||||
InputStream sheetInputStream) | |||||
throws IOException, SAXException { | |||||
DataFormatter formatter; | |||||
if (locale == null) { | |||||
formatter = new DataFormatter(); | |||||
} else { | |||||
formatter = new DataFormatter(locale); | |||||
} | |||||
XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler( | |||||
sheetInputStream, | |||||
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults | |||||
); | |||||
xssfbSheetHandler.parse(); | |||||
} | |||||
/** | |||||
* Processes the file and returns the text | |||||
*/ | |||||
public String getText() { | |||||
try { | |||||
XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container); | |||||
XSSFBReader xssfbReader = new XSSFBReader(container); | |||||
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable(); | |||||
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData(); | |||||
StringBuffer text = new StringBuffer(); | |||||
SheetTextExtractor sheetExtractor = new SheetTextExtractor(); | |||||
XSSFBHyperlinksTable hyperlinksTable = null; | |||||
while (iter.hasNext()) { | |||||
InputStream stream = iter.next(); | |||||
if (includeSheetNames) { | |||||
text.append(iter.getSheetName()); | |||||
text.append('\n'); | |||||
} | |||||
if (handleHyperlinksInCells) { | |||||
hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart()); | |||||
} | |||||
XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null; | |||||
processSheet(sheetExtractor, styles, comments, strings, stream); | |||||
if (includeHeadersFooters) { | |||||
sheetExtractor.appendHeaderText(text); | |||||
} | |||||
sheetExtractor.appendCellText(text); | |||||
if (includeTextBoxes) { | |||||
processShapes(iter.getShapes(), text); | |||||
} | |||||
if (includeHeadersFooters) { | |||||
sheetExtractor.appendFooterText(text); | |||||
} | |||||
sheetExtractor.reset(); | |||||
stream.close(); | |||||
} | |||||
return text.toString(); | |||||
} catch (IOException e) { | |||||
System.err.println(e); | |||||
return null; | |||||
} catch (SAXException se) { | |||||
System.err.println(se); | |||||
return null; | |||||
} catch (OpenXML4JException o4je) { | |||||
System.err.println(o4je); | |||||
return null; | |||||
} | |||||
} | |||||
} |
*/ | */ | ||||
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor | public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor | ||||
implements org.apache.poi.ss.extractor.ExcelExtractor { | implements org.apache.poi.ss.extractor.ExcelExtractor { | ||||
private OPCPackage container; | |||||
OPCPackage container; | |||||
private POIXMLProperties properties; | private POIXMLProperties properties; | ||||
private Locale locale; | |||||
private boolean includeTextBoxes = true; | |||||
private boolean includeSheetNames = true; | |||||
private boolean includeCellComments = false; | |||||
private boolean includeHeadersFooters = true; | |||||
private boolean formulasNotResults = false; | |||||
Locale locale; | |||||
boolean includeTextBoxes = true; | |||||
boolean includeSheetNames = true; | |||||
boolean includeCellComments = false; | |||||
boolean includeHeadersFooters = true; | |||||
boolean formulasNotResults = false; | |||||
private boolean concatenatePhoneticRuns = true; | private boolean concatenatePhoneticRuns = true; | ||||
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { | public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { | ||||
} | } | ||||
} | } | ||||
private void processShapes(List<XSSFShape> shapes, StringBuffer text) { | |||||
void processShapes(List<XSSFShape> shapes, StringBuffer text) { | |||||
if (shapes == null){ | if (shapes == null){ | ||||
return; | return; | ||||
} | } | ||||
* @see XSSFExcelExtractor#getText() | * @see XSSFExcelExtractor#getText() | ||||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter) | * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter) | ||||
*/ | */ | ||||
private void appendHeaderText(StringBuffer buffer) { | |||||
void appendHeaderText(StringBuffer buffer) { | |||||
appendHeaderFooterText(buffer, "firstHeader"); | appendHeaderFooterText(buffer, "firstHeader"); | ||||
appendHeaderFooterText(buffer, "oddHeader"); | appendHeaderFooterText(buffer, "oddHeader"); | ||||
appendHeaderFooterText(buffer, "evenHeader"); | appendHeaderFooterText(buffer, "evenHeader"); | ||||
* @see XSSFExcelExtractor#getText() | * @see XSSFExcelExtractor#getText() | ||||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter) | * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter) | ||||
*/ | */ | ||||
private void appendFooterText(StringBuffer buffer) { | |||||
void appendFooterText(StringBuffer buffer) { | |||||
// append the text for each footer type in the same order | // append the text for each footer type in the same order | ||||
// they are appended in XSSFExcelExtractor | // they are appended in XSSFExcelExtractor | ||||
appendHeaderFooterText(buffer, "firstFooter"); | appendHeaderFooterText(buffer, "firstFooter"); | ||||
/** | /** | ||||
* Append the cell contents we have collected. | * Append the cell contents we have collected. | ||||
*/ | */ | ||||
private void appendCellText(StringBuffer buffer) { | |||||
void appendCellText(StringBuffer buffer) { | |||||
checkMaxTextSize(buffer, output.toString()); | checkMaxTextSize(buffer, output.toString()); | ||||
buffer.append(output); | buffer.append(output); | ||||
} | } | ||||
/** | /** | ||||
* Reset this <code>SheetTextExtractor</code> for the next sheet. | * Reset this <code>SheetTextExtractor</code> for the next sheet. | ||||
*/ | */ | ||||
private void reset() { | |||||
void reset() { | |||||
output.setLength(0); | output.setLength(0); | ||||
firstCellOfRow = true; | firstCellOfRow = true; | ||||
if (headerFooterMap != null) { | if (headerFooterMap != null) { |
private static File xlsxStrict; | private static File xlsxStrict; | ||||
private static File xltx; | private static File xltx; | ||||
private static File xlsEmb; | private static File xlsEmb; | ||||
private static File xlsb; | |||||
private static File doc; | private static File doc; | ||||
private static File doc6; | private static File doc6; | ||||
xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx"); | xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx"); | ||||
xltx = getFileAndCheck(ssTests, "test.xltx"); | xltx = getFileAndCheck(ssTests, "test.xltx"); | ||||
xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls"); | xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls"); | ||||
xlsb = getFileAndCheck(ssTests, "testVarious.xlsb"); | |||||
POIDataSamples wpTests = POIDataSamples.getDocumentInstance(); | POIDataSamples wpTests = POIDataSamples.getDocumentInstance(); | ||||
doc = getFileAndCheck(wpTests, "SampleDoc.doc"); | doc = getFileAndCheck(wpTests, "SampleDoc.doc"); | ||||
); | ); | ||||
extractor.close(); | extractor.close(); | ||||
extractor = ExtractorFactory.createExtractor(xlsb); | |||||
assertTrue( | |||||
extractor.getText().contains("test") | |||||
); | |||||
extractor.close(); | |||||
extractor = ExtractorFactory.createExtractor(xltx); | extractor = ExtractorFactory.createExtractor(xltx); | ||||
assertTrue( | assertTrue( | ||||
extractor.getText().contains("test") | extractor.getText().contains("test") |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import static org.junit.Assert.assertEquals; | |||||
import java.util.List; | |||||
import java.util.regex.Pattern; | |||||
import org.apache.poi.POIDataSamples; | |||||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||||
import org.apache.poi.openxml4j.opc.PackagePart; | |||||
import org.junit.Test; | |||||
public class TestXSSFBSharedStringsTable { | |||||
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); | |||||
@Test | |||||
public void testBasic() throws Exception { | |||||
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsb")); | |||||
List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.bin")); | |||||
assertEquals(1, parts.size()); | |||||
XSSFBSharedStringsTable rtbl = new XSSFBSharedStringsTable(parts.get(0)); | |||||
List<String> strings = rtbl.getItems(); | |||||
assertEquals(49, strings.size()); | |||||
assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0)); | |||||
assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3)); | |||||
assertEquals(55, rtbl.getCount()); | |||||
assertEquals(49, rtbl.getUniqueCount()); | |||||
//TODO: add in tests for phonetic runs | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.binary; | |||||
import static org.junit.Assert.assertEquals; | |||||
import static org.junit.Assert.assertNotNull; | |||||
import java.util.List; | |||||
import org.apache.poi.POIDataSamples; | |||||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||||
import org.apache.poi.ss.util.CellAddress; | |||||
import org.apache.poi.xssf.eventusermodel.XSSFBReader; | |||||
import org.apache.poi.xssf.eventusermodel.XSSFReader; | |||||
import org.junit.Test; | |||||
public class TestXSSFBSheetHyperlinkManager { | |||||
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); | |||||
@Test | |||||
public void testBasic() throws Exception { | |||||
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb")); | |||||
XSSFBReader reader = new XSSFBReader(pkg); | |||||
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData(); | |||||
it.next(); | |||||
XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart()); | |||||
List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0)); | |||||
assertNotNull(records); | |||||
assertEquals(1, records.size()); | |||||
XSSFHyperlinkRecord record = records.get(0); | |||||
assertEquals("http://tika.apache.org/", record.getLocation()); | |||||
assertEquals("rId2", record.getRelId()); | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.eventusermodel; | |||||
import static org.junit.Assert.assertEquals; | |||||
import static org.junit.Assert.assertNotNull; | |||||
import static org.junit.Assert.fail; | |||||
import java.io.InputStream; | |||||
import java.util.ArrayList; | |||||
import java.util.List; | |||||
import org.apache.poi.POIDataSamples; | |||||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||||
import org.apache.poi.ss.usermodel.DataFormatter; | |||||
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable; | |||||
import org.apache.poi.xssf.binary.XSSFBSheetHandler; | |||||
import org.apache.poi.xssf.binary.XSSFBStylesTable; | |||||
import org.apache.poi.xssf.usermodel.XSSFComment; | |||||
import org.junit.Test; | |||||
public class TestXSSFBReader { | |||||
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance(); | |||||
@Test | |||||
public void testBasic() throws Exception { | |||||
List<String> sheetTexts = getSheets("testVarious.xlsb"); | |||||
assertEquals(1, sheetTexts.size()); | |||||
String xsxml = sheetTexts.get(0); | |||||
assertContains("This is a string", xsxml); | |||||
assertContains("<td ref=\"B2\">13</td>", xsxml); | |||||
assertContains("<td ref=\"B3\">13.12112313</td>", xsxml); | |||||
assertContains("<td ref=\"B4\">$ 3.03</td>", xsxml); | |||||
assertContains("<td ref=\"B5\">20%</td>", xsxml); | |||||
assertContains("<td ref=\"B6\">13.12</td>", xsxml); | |||||
assertContains("<td ref=\"B7\">1.23457E+14</td>", xsxml); | |||||
assertContains("<td ref=\"B8\">1.23457E+15</td>", xsxml); | |||||
assertContains("46/1963", xsxml);//custom format 1 | |||||
assertContains("3/128", xsxml);//custom format 2 | |||||
assertContains("<tr num=\"7>\n" + | |||||
"\t<td ref=\"A8\">longer int</td>\n" + | |||||
"\t<td ref=\"B8\">1.23457E+15</td>\n" + | |||||
"\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||||
"test comment2</span></td>\n" + | |||||
"</tr num=\"7>", xsxml); | |||||
assertContains("<tr num=\"34>\n" + | |||||
"\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||||
"comment6 actually in cell</span></td>\n" + | |||||
"</tr num=\"34>", xsxml); | |||||
assertContains("<tr num=\"64>\n" + | |||||
"\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||||
"comment7 end of file</span></td>\n" + | |||||
"</tr num=\"64>", xsxml); | |||||
assertContains("<tr num=\"65>\n" + | |||||
"\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||||
"comment8 end of file</span></td>\n" + | |||||
"</tr num=\"65>", xsxml); | |||||
assertContains("<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>", xsxml); | |||||
assertContains("<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>", xsxml); | |||||
assertContains( | |||||
"<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>", | |||||
xsxml); | |||||
assertContains( | |||||
"<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>", | |||||
xsxml); | |||||
assertContains( | |||||
"<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>", | |||||
xsxml); | |||||
assertContains( | |||||
"<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>", | |||||
xsxml); | |||||
} | |||||
@Test | |||||
public void testComments() throws Exception { | |||||
List<String> sheetTexts = getSheets("comments.xlsb"); | |||||
String xsxml = sheetTexts.get(0); | |||||
assertContains( | |||||
"<tr num=\"0>\n" + | |||||
"\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" + | |||||
"\t<td ref=\"B1\">row1</td>\n" + | |||||
"</tr num=\"0>", xsxml); | |||||
assertContains( | |||||
"<tr num=\"1>\n" + | |||||
"\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + | |||||
"comment row2 (index1)</span></td>\n" + | |||||
"</tr num=\"1>", | |||||
xsxml); | |||||
assertContains("<tr num=\"2>\n" + | |||||
"\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" + | |||||
"\t<td ref=\"B3\">row3</td>\n", xsxml); | |||||
assertContains("<tr num=\"3>\n" + | |||||
"\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" + | |||||
"\t<td ref=\"B4\">row4</td>\n" + | |||||
"</tr num=\"3></sheet>", xsxml); | |||||
} | |||||
private List<String> getSheets(String testFileName) throws Exception { | |||||
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName)); | |||||
List<String> sheetTexts = new ArrayList<String>(); | |||||
XSSFBReader r = new XSSFBReader(pkg); | |||||
// assertNotNull(r.getWorkbookData()); | |||||
// assertNotNull(r.getSharedStringsData()); | |||||
assertNotNull(r.getXSSFBStylesTable()); | |||||
XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg); | |||||
XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable(); | |||||
XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData(); | |||||
while (it.hasNext()) { | |||||
InputStream is = it.next(); | |||||
String name = it.getSheetName(); | |||||
TestSheetHandler testSheetHandler = new TestSheetHandler(); | |||||
testSheetHandler.startSheet(name); | |||||
XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is, | |||||
xssfbStylesTable, | |||||
it.getXSSFBSheetComments(), | |||||
sst, testSheetHandler, | |||||
new DataFormatter(), | |||||
false); | |||||
sheetHandler.parse(); | |||||
testSheetHandler.endSheet(); | |||||
sheetTexts.add(testSheetHandler.toString()); | |||||
} | |||||
return sheetTexts; | |||||
} | |||||
//This converts all [\r\n\t]+ to " " | |||||
private void assertContains(String needle, String haystack) { | |||||
needle = needle.replaceAll("[\r\n\t]+", " "); | |||||
haystack = haystack.replaceAll("[\r\n\t]+", " "); | |||||
if (haystack.indexOf(needle) < 0) { | |||||
fail("couldn't find >"+needle+"< in: "+haystack ); | |||||
} | |||||
} | |||||
@Test | |||||
public void testDate() throws Exception { | |||||
List<String> sheets = getSheets("date.xlsb"); | |||||
assertEquals(1, sheets.size()); | |||||
assertContains("1/12/13", sheets.get(0)); | |||||
} | |||||
private class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler { | |||||
private final StringBuilder sb = new StringBuilder(); | |||||
public void startSheet(String sheetName) { | |||||
sb.append("<sheet name=\"").append(sheetName).append(">"); | |||||
} | |||||
public void endSheet(){ | |||||
sb.append("</sheet>"); | |||||
} | |||||
@Override | |||||
public void startRow(int rowNum) { | |||||
sb.append("\n<tr num=\"").append(rowNum).append(">"); | |||||
} | |||||
@Override | |||||
public void endRow(int rowNum) { | |||||
sb.append("\n</tr num=\"").append(rowNum).append(">"); | |||||
} | |||||
@Override | |||||
public void cell(String cellReference, String formattedValue, XSSFComment comment) { | |||||
formattedValue = (formattedValue == null) ? "" : formattedValue; | |||||
if (comment == null) { | |||||
sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>"); | |||||
} else { | |||||
sb.append("\n\t<td ref=\"").append(cellReference).append("\">") | |||||
.append(formattedValue) | |||||
.append("<span type=\"comment\" author=\"") | |||||
.append(comment.getAuthor()).append("\">") | |||||
.append(comment.getString().toString().trim()).append("</span>") | |||||
.append("</td>"); | |||||
} | |||||
} | |||||
@Override | |||||
public void headerFooter(String text, boolean isHeader, String tagName) { | |||||
if (isHeader) { | |||||
sb.append("<header tagName=\""+tagName+"\">"+text+"</header>"); | |||||
} else { | |||||
sb.append("<footer tagName=\""+tagName+"\">"+text+"</footer>"); | |||||
} | |||||
} | |||||
@Override | |||||
public String toString() { | |||||
return sb.toString(); | |||||
} | |||||
} | |||||
} |
/* ==================================================================== | |||||
Licensed to the Apache Software Foundation (ASF) under one or more | |||||
contributor license agreements. See the NOTICE file distributed with | |||||
this work for additional information regarding copyright ownership. | |||||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||||
(the "License"); you may not use this file except in compliance with | |||||
the License. You may obtain a copy of the License at | |||||
http://www.apache.org/licenses/LICENSE-2.0 | |||||
Unless required by applicable law or agreed to in writing, software | |||||
distributed under the License is distributed on an "AS IS" BASIS, | |||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
See the License for the specific language governing permissions and | |||||
limitations under the License. | |||||
==================================================================== */ | |||||
package org.apache.poi.xssf.extractor; | |||||
import static org.junit.Assert.assertEquals; | |||||
import static org.junit.Assert.assertTrue; | |||||
import org.apache.poi.xssf.XSSFTestDataSamples; | |||||
import org.junit.Test; | |||||
/** | |||||
* Tests for {@link XSSFBEventBasedExcelExtractor} | |||||
*/ | |||||
public class TestXSSFBEventBasedExcelExtractor { | |||||
protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { | |||||
return new XSSFBEventBasedExcelExtractor(XSSFTestDataSamples. | |||||
openSamplePackage(sampleName)); | |||||
} | |||||
/** | |||||
* Get text out of the simple file | |||||
*/ | |||||
@Test | |||||
public void testGetSimpleText() throws Exception { | |||||
// a very simple file | |||||
XSSFEventBasedExcelExtractor extractor = getExtractor("sample.xlsb"); | |||||
extractor.setIncludeCellComments(true); | |||||
extractor.getText(); | |||||
String text = extractor.getText(); | |||||
assertTrue(text.length() > 0); | |||||
// Check sheet names | |||||
assertTrue(text.startsWith("Sheet1")); | |||||
assertTrue(text.endsWith("Sheet3\n")); | |||||
// Now without, will have text | |||||
extractor.setIncludeSheetNames(false); | |||||
text = extractor.getText(); | |||||
String CHUNK1 = | |||||
"Lorem\t111\n" + | |||||
"ipsum\t222\n" + | |||||
"dolor\t333\n" + | |||||
"sit\t444\n" + | |||||
"amet\t555\n" + | |||||
"consectetuer\t666\n" + | |||||
"adipiscing\t777\n" + | |||||
"elit\t888\n" + | |||||
"Nunc\t999\n"; | |||||
String CHUNK2 = | |||||
"The quick brown fox jumps over the lazy dog\n" + | |||||
"hello, xssf hello, xssf\n" + | |||||
"hello, xssf hello, xssf\n" + | |||||
"hello, xssf hello, xssf\n" + | |||||
"hello, xssf hello, xssf\n"; | |||||
assertEquals( | |||||
CHUNK1 + | |||||
"at\t4995\n" + | |||||
CHUNK2 | |||||
, text); | |||||
} | |||||
/** | |||||
* Test text extraction from text box using getShapes() | |||||
* | |||||
* @throws Exception | |||||
*/ | |||||
@Test | |||||
public void testShapes() throws Exception { | |||||
XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsb"); | |||||
try { | |||||
String text = ooxmlExtractor.getText(); | |||||
assertTrue(text.indexOf("Line 1") > -1); | |||||
assertTrue(text.indexOf("Line 2") > -1); | |||||
assertTrue(text.indexOf("Line 3") > -1); | |||||
} finally { | |||||
ooxmlExtractor.close(); | |||||
} | |||||
} | |||||
} |