git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1511789 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_10_BETA2
@@ -21,6 +21,8 @@ import java.io.InputStream; | |||
import java.util.ArrayList; | |||
import java.util.HashMap; | |||
import java.util.Iterator; | |||
import java.util.LinkedList; | |||
import java.util.List; | |||
import java.util.Map; | |||
import org.apache.poi.POIXMLException; | |||
@@ -37,7 +39,9 @@ import org.apache.poi.xssf.model.CommentsTable; | |||
import org.apache.poi.xssf.model.SharedStringsTable; | |||
import org.apache.poi.xssf.model.StylesTable; | |||
import org.apache.poi.xssf.model.ThemesTable; | |||
import org.apache.poi.xssf.usermodel.XSSFDrawing; | |||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||
import org.apache.poi.xssf.usermodel.XSSFShape; | |||
import org.apache.xmlbeans.XmlException; | |||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; | |||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook; | |||
@@ -273,6 +277,35 @@ public class XSSFReader { | |||
return null; | |||
} | |||
/** | |||
* Returns the shapes associated with this sheet, | |||
* an empty list or null if there is an exception | |||
*/ | |||
public List<XSSFShape> getShapes() { | |||
PackagePart sheetPkg = getSheetPart(); | |||
List<XSSFShape> shapes= new LinkedList<XSSFShape>(); | |||
// Do we have a comments relationship? (Only ever one if so) | |||
try { | |||
PackageRelationshipCollection drawingsList = sheetPkg.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation()); | |||
for (int i = 0; i < drawingsList.size(); i++){ | |||
PackageRelationship drawings = drawingsList.getRelationship(i); | |||
PackagePartName drawingsName = PackagingURIHelper.createPartName(drawings.getTargetURI()); | |||
PackagePart drawingsPart = sheetPkg.getPackage().getPart(drawingsName); | |||
XSSFDrawing drawing = new XSSFDrawing(drawingsPart, drawings); | |||
for (XSSFShape shape : drawing.getShapes()){ | |||
shapes.add(shape); | |||
} | |||
} | |||
} catch (XmlException e){ | |||
return null; | |||
} catch (InvalidFormatException e) { | |||
return null; | |||
} catch (IOException e) { | |||
return null; | |||
} | |||
return shapes; | |||
} | |||
public PackagePart getSheetPart() { | |||
String sheetId = ctSheet.getId(); | |||
return sheetMap.get(sheetId); |
@@ -18,6 +18,7 @@ package org.apache.poi.xssf.extractor; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.List; | |||
import java.util.Locale; | |||
import javax.xml.parsers.ParserConfigurationException; | |||
@@ -37,6 +38,8 @@ import org.apache.poi.xssf.eventusermodel.XSSFReader; | |||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; | |||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; | |||
import org.apache.poi.xssf.model.StylesTable; | |||
import org.apache.poi.xssf.usermodel.XSSFShape; | |||
import org.apache.poi.xssf.usermodel.XSSFSimpleShape; | |||
import org.apache.xmlbeans.XmlException; | |||
import org.xml.sax.ContentHandler; | |||
import org.xml.sax.InputSource; | |||
@@ -54,6 +57,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { | |||
private Locale locale; | |||
private boolean includeSheetNames = true; | |||
private boolean formulasNotResults = false; | |||
private boolean includeTextBoxes = true; | |||
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { | |||
this(OPCPackage.open(path)); | |||
@@ -89,6 +93,14 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { | |||
public void setFormulasNotResults(boolean formulasNotResults) { | |||
this.formulasNotResults = formulasNotResults; | |||
} | |||
/** | |||
* Should text from textboxes be included? Default is true | |||
*/ | |||
public void setIncludeTextBoxes(boolean includeTextBoxes) { | |||
this.includeTextBoxes = includeTextBoxes; | |||
} | |||
public void setLocale(Locale locale) { | |||
this.locale = locale; | |||
@@ -175,6 +187,9 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { | |||
text.append('\n'); | |||
} | |||
processSheet(sheetExtractor, styles, strings, stream); | |||
if (includeTextBoxes){ | |||
processShapes(iter.getShapes(), text); | |||
} | |||
stream.close(); | |||
} | |||
@@ -191,7 +206,20 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { | |||
} | |||
} | |||
@Override | |||
private void processShapes(List<XSSFShape> shapes, StringBuffer text) { | |||
if (shapes == null){ | |||
return; | |||
} | |||
for (XSSFShape shape : shapes){ | |||
if (shape instanceof XSSFSimpleShape){ | |||
String sText = ((XSSFSimpleShape)shape).getText(); | |||
if (sText != null && sText.length() > 0){ | |||
text.append(sText).append('\n'); | |||
} | |||
} | |||
} | |||
} | |||
@Override | |||
public void close() throws IOException { | |||
if (container != null) { | |||
container.close(); |
@@ -31,8 +31,11 @@ import org.apache.poi.ss.usermodel.DataFormatter; | |||
import org.apache.poi.ss.usermodel.HeaderFooter; | |||
import org.apache.poi.ss.usermodel.Row; | |||
import org.apache.poi.xssf.usermodel.XSSFCell; | |||
import org.apache.poi.xssf.usermodel.XSSFDrawing; | |||
import org.apache.poi.xssf.usermodel.XSSFRelation; | |||
import org.apache.poi.xssf.usermodel.XSSFShape; | |||
import org.apache.poi.xssf.usermodel.XSSFSheet; | |||
import org.apache.poi.xssf.usermodel.XSSFSimpleShape; | |||
import org.apache.poi.xssf.usermodel.XSSFWorkbook; | |||
import org.apache.xmlbeans.XmlException; | |||
@@ -52,6 +55,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apach | |||
private boolean formulasNotResults = false; | |||
private boolean includeCellComments = false; | |||
private boolean includeHeadersFooters = true; | |||
private boolean includeTextBoxes = true; | |||
/** | |||
* @deprecated Use {@link #XSSFExcelExtractor(org.apache.poi.openxml4j.opc.OPCPackage)} instead. | |||
@@ -103,6 +107,13 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apach | |||
public void setIncludeHeadersFooters(boolean includeHeadersFooters) { | |||
this.includeHeadersFooters = includeHeadersFooters; | |||
} | |||
/** | |||
* Should text within textboxes be included? Default is true | |||
* @param includeTextBoxes | |||
*/ | |||
public void setIncludeTextBoxes(boolean includeTextBoxes){ | |||
this.includeTextBoxes = includeTextBoxes; | |||
} | |||
/** | |||
* What Locale should be used for formatting numbers (based | |||
* on the styles applied to the cells) | |||
@@ -180,7 +191,20 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apach | |||
} | |||
text.append("\n"); | |||
} | |||
// add textboxes | |||
if (includeTextBoxes){ | |||
XSSFDrawing drawing = sheet.createDrawingPatriarch(); | |||
for (XSSFShape shape : drawing.getShapes()){ | |||
if (shape instanceof XSSFSimpleShape){ | |||
String boxText = ((XSSFSimpleShape)shape).getText(); | |||
if (boxText.length() > 0){ | |||
text.append(boxText); | |||
text.append('\n'); | |||
} | |||
} | |||
} | |||
} | |||
// Finally footer(s), if present | |||
if(includeHeadersFooters) { | |||
text.append( |
@@ -76,7 +76,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing { | |||
* @param rel the package relationship holding this drawing, | |||
* the relationship type must be http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing | |||
*/ | |||
protected XSSFDrawing(PackagePart part, PackageRelationship rel) throws IOException, XmlException { | |||
public XSSFDrawing(PackagePart part, PackageRelationship rel) throws IOException, XmlException { | |||
super(part, rel); | |||
XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS); | |||
//Removing root element |
@@ -19,6 +19,7 @@ package org.apache.poi.xssf.eventusermodel; | |||
import java.io.InputStream; | |||
import java.util.Iterator; | |||
import java.util.List; | |||
import junit.framework.TestCase; | |||
@@ -27,6 +28,8 @@ import org.apache.poi.util.IOUtils; | |||
import org.apache.poi.xssf.XSSFTestDataSamples; | |||
import org.apache.poi.xssf.model.CommentsTable; | |||
import org.apache.poi.xssf.usermodel.XSSFRichTextString; | |||
import org.apache.poi.xssf.usermodel.XSSFShape; | |||
import org.apache.poi.xssf.usermodel.XSSFSimpleShape; | |||
import org.apache.poi.POIDataSamples; | |||
/** | |||
@@ -164,4 +167,33 @@ public final class TestXSSFReader extends TestCase { | |||
stream.close(); | |||
} | |||
} | |||
/** | |||
* Test text extraction from text box using getShapes() | |||
* @throws Exception | |||
*/ | |||
public void testShapes() throws Exception{ | |||
OPCPackage pkg = XSSFTestDataSamples.openSamplePackage("WithTextBox.xlsx"); | |||
XSSFReader r = new XSSFReader(pkg); | |||
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData(); | |||
StringBuilder sb = new StringBuilder(); | |||
while(it.hasNext()) | |||
{ | |||
it.next(); | |||
List<XSSFShape> shapes = it.getShapes(); | |||
if (shapes != null){ | |||
for (XSSFShape shape : shapes){ | |||
if (shape instanceof XSSFSimpleShape){ | |||
String t = ((XSSFSimpleShape)shape).getText(); | |||
sb.append(t).append('\n'); | |||
} | |||
} | |||
} | |||
} | |||
String text = sb.toString(); | |||
assertTrue(text.indexOf("Line 1") > -1); | |||
assertTrue(text.indexOf("Line 2") > -1); | |||
assertTrue(text.indexOf("Line 3") > -1); | |||
} | |||
} |
@@ -17,6 +17,7 @@ | |||
package org.apache.poi.xssf.extractor; | |||
import java.util.List; | |||
import java.util.regex.Matcher; | |||
import java.util.regex.Pattern; | |||
@@ -25,7 +26,11 @@ import junit.framework.TestCase; | |||
import org.apache.poi.POITextExtractor; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
import org.apache.poi.hssf.extractor.ExcelExtractor; | |||
import org.apache.poi.openxml4j.opc.OPCPackage; | |||
import org.apache.poi.xssf.XSSFTestDataSamples; | |||
import org.apache.poi.xssf.eventusermodel.XSSFReader; | |||
import org.apache.poi.xssf.usermodel.XSSFShape; | |||
import org.apache.poi.xssf.usermodel.XSSFSimpleShape; | |||
/** | |||
* Tests for {@link XSSFEventBasedExcelExtractor} | |||
@@ -167,4 +172,19 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase { | |||
ole2Extractor.close(); | |||
ooxmlExtractor.close(); | |||
} | |||
/** | |||
* Test text extraction from text box using getShapes() | |||
* @throws Exception | |||
*/ | |||
public void testShapes() throws Exception{ | |||
XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsx"); | |||
String text = ooxmlExtractor.getText(); | |||
assertTrue(text.indexOf("Line 1") > -1); | |||
assertTrue(text.indexOf("Line 2") > -1); | |||
assertTrue(text.indexOf("Line 3") > -1); | |||
} | |||
} |
@@ -211,4 +211,16 @@ public class TestXSSFExcelExtractor extends TestCase { | |||
extractor.close(); | |||
} | |||
/** | |||
* Simple test for text box text | |||
* @throws IOException | |||
*/ | |||
public void testTextBoxes() throws IOException { | |||
XSSFExcelExtractor extractor = getExtractor("WithTextBox.xlsx"); | |||
extractor.setFormulasNotResults(true); | |||
String text = extractor.getText(); | |||
assertTrue(text.indexOf("Line 1") > -1); | |||
assertTrue(text.indexOf("Line 2") > -1); | |||
assertTrue(text.indexOf("Line 3") > -1); | |||
} | |||
} |