<li><link href="#Autofit">How to adjust column width to fit the contents</link></li>
<li><link href="#Hyperlinks">Hyperlinks</link></li>
<li><link href="#Validation">Data Validation</link></li>
+ <li><link href="#Embedded">Embedded Objects</link></li>
</ul>
</section>
<section><title>Features</title>
dvConstraint = DVConstraint.createFormulaListConstraint("'Sheet1'!$A$1:$A$3");
</source>
</section>
+ <anchor id="Embedded"/>
+ <section><title>Embedded Objects</title>
+ <p>It is possible to perform more detailed processing of an embedded Excel, Word or PowerPoint document,
+ or to work with any other type of embedded object.</p>
+ <p><strong>HSSF:</strong></p>
+ <source>
+ POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream("excel_with_embeded.xls"));
+ HSSFWorkbook workbook = new HSSFWorkbook(fs);
+ for (HSSFObjectData obj : workbook.getAllEmbeddedObjects()) {
+ //the OLE2 Class Name of the object
+ String oleName = obj.getOLE2ClassName();
+ if (oleName.equals("Worksheet")) {
+ DirectoryNode dn = (DirectoryNode) obj.getDirectory();
+ HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(dn, fs, false);
+ //System.out.println(entry.getName() + ": " + embeddedWorkbook.getNumberOfSheets());
+ } else if (oleName.equals("Document")) {
+ DirectoryNode dn = (DirectoryNode) obj.getDirectory();
+ HWPFDocument embeddedWordDocument = new HWPFDocument(dn, fs);
+ //System.out.println(entry.getName() + ": " + embeddedWordDocument.getRange().text());
+ } else if (oleName.equals("Presentation")) {
+ DirectoryNode dn = (DirectoryNode) obj.getDirectory();
+ SlideShow embeddedPowerPointDocument = new SlideShow(new HSLFSlideShow(dn, fs));
+ //System.out.println(entry.getName() + ": " + embeddedPowerPointDocument.getSlides().length);
+ } else {
+ if(obj.hasDirectoryEntry()){
+ // The DirectoryEntry is a DocumentNode. Examine its entries to find out what it is
+ DirectoryNode dn = (DirectoryNode) obj.getDirectory();
+ for (Iterator entries = dn.getEntries(); entries.hasNext();) {
+ Entry entry = (Entry) entries.next();
+ //System.out.println(oleName + "." + entry.getName());
+ }
+ } else {
+ // There is no DirectoryEntry
+ // Recover the object's data from the HSSFObjectData instance.
+ byte[] objectData = obj.getObjectData();
+ }
+ }
+ }
+ </source>
+ <p><strong>XSSF:</strong></p>
+ <source>
+ XSSFWorkbook workbook = new XSSFWorkbook("excel_with_embeded.xlsx");
+ for (PackagePart pPart : workbook.getAllEmbedds()) {
+ String contentType = pPart.getContentType();
+ // Excel Workbook - either binary or OpenXML
+ if (contentType.equals("application/vnd.ms-excel")) {
+ HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream());
+ }
+ // Excel Workbook - OpenXML file format
+ else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {
+ OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
+ XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(docPackage);
+ }
+ // Word Document - binary (OLE2CDF) file format
+ else if (contentType.equals("application/msword")) {
+ HWPFDocument document = new HWPFDocument(pPart.getInputStream());
+ }
+ // Word Document - OpenXML file format
+ else if (contentType.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {
+ OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
+ XWPFDocument document = new XWPFDocument(docPackage);
+ }
+ // PowerPoint Document - binary file format
+ else if (contentType.equals("application/vnd.ms-powerpoint")) {
+ HSLFSlideShow slideShow = new HSLFSlideShow(pPart.getInputStream());
+ }
+ // PowerPoint Document - OpenXML file format
+ else if (contentType.equals("application/vnd.openxmlformats-officedocument.presentationml.presentation")) {
+ OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());
+ XSLFSlideShow slideShow = new XSLFSlideShow(docPackage);
+ }
+ // Any other type of embedded object.
+ else {
+ System.out.println("Unknown Embedded Document: " + contentType);
+ InputStream inputStream = pPart.getInputStream();
+ }
+ }
+ </source>
+ </section>
</body>
</document>
<changes>
<release version="3.5-beta7" date="2009-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">47535 - fixed WordExtractor to tolerate files with empty footnote block</action>
<action dev="POI-DEVELOPERS" type="fix">47517 - Fixed ExtractorFactory to support .xltx and .dotx files</action>
<action dev="POI-DEVELOPERS" type="add">45556 - Support for extraction of footnotes from docx files</action>
<action dev="POI-DEVELOPERS" type="add">45555 - Support for extraction of endnotes from docx files</action>
<em>org.apache.poi.hdgf.extractor.VisioTextExtractor</em>, which
will return text for your file.</p>
</section>
+
+ <section><title>Embedded Objects</title>
+ <p>Extractors already exist for Excel, Word, PowerPoint and Visio;
+ if one of these objects is embedded into a worksheet, the ExtractorFactory class can be used to recover an extractor for it.
+ </p>
+ <source>
+ FileInputStream fis = new FileInputStream(inputFile);
+ POIFSFileSystem fileSystem = new POIFSFileSystem(fis);
+ // Firstly, get an extractor for the Workbook
+ POIOLE2TextExtractor oleTextExtractor = ExtractorFactory.createExtractor(fileSystem);
+ // Then a List of extractors for any embedded Excel, Word, PowerPoint
+ // or Visio objects embedded into it.
+ POITextExtractor[] embeddedExtractors = ExtractorFactory.getEmbededDocsTextExtractors(oleTextExtractor);
+ for (POITextExtractor textExtractor : embeddedExtractors) {
+ // If the embedded object was an Excel spreadsheet.
+ if (textExtractor instanceof ExcelExtractor) {
+ ExcelExtractor excelExtractor = (ExcelExtractor) textExtractor;
+ System.out.println(excelExtractor.getText());
+ }
+ // A Word Document
+ else if (textExtractor instanceof WordExtractor) {
+ WordExtractor wordExtractor = (WordExtractor) textExtractor;
+ String[] paragraphText = wordExtractor.getParagraphText();
+ for (String paragraph : paragraphText) {
+ System.out.println(paragraph);
+ }
+ // Display the document's header and footer text
+ System.out.println("Footer text: " + wordExtractor.getFooterText());
+ System.out.println("Header text: " + wordExtractor.getHeaderText());
+ }
+ // PowerPoint Presentation.
+ else if (textExtractor instanceof PowerPointExtractor) {
+ PowerPointExtractor powerPointExtractor = (PowerPointExtractor) textExtractor;
+ System.out.println("Text: " + powerPointExtractor.getText());
+ System.out.println("Notes: " + powerPointExtractor.getNotes());
+ }
+ // Visio Drawing
+ else if (textExtractor instanceof VisioTextExtractor) {
+ VisioTextExtractor visioTextExtractor = (VisioTextExtractor) textExtractor;
+ System.out.println("Text: " + visioTextExtractor.getText());
+ }
+ }
+ </source>
+ </section>
</body>
<footer>
--- /dev/null
+/* ====================================================================\r
+ Licensed to the Apache Software Foundation (ASF) under one or more\r
+ contributor license agreements. See the NOTICE file distributed with\r
+ this work for additional information regarding copyright ownership.\r
+ The ASF licenses this file to You under the Apache License, Version 2.0\r
+ (the "License"); you may not use this file except in compliance with\r
+ the License. You may obtain a copy of the License at\r
+\r
+ http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+ Unless required by applicable law or agreed to in writing, software\r
+ distributed under the License is distributed on an "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ See the License for the specific language governing permissions and\r
+ limitations under the License.\r
+==================================================================== */\r
+package org.apache.poi.hssf.usermodel.examples;\r
+\r
+import org.apache.poi.hssf.usermodel.*;\r
+import org.apache.poi.poifs.filesystem.DirectoryNode;\r
+import org.apache.poi.poifs.filesystem.Entry;\r
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;\r
+import org.apache.poi.hwpf.HWPFDocument;\r
+import org.apache.poi.hslf.HSLFSlideShow;\r
+import org.apache.poi.hslf.usermodel.SlideShow;\r
+\r
+import java.io.FileInputStream;\r
+import java.util.Iterator;\r
+\r
+/**\r
+ * Demonstrates how you can extract embedded data from a .xls file\r
+ */\r
+public class EmeddedObjects {\r
+ public static void main(String[] args) throws Exception {\r
+ POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(args[0]));\r
+ HSSFWorkbook workbook = new HSSFWorkbook(fs);\r
+ for (HSSFObjectData obj : workbook.getAllEmbeddedObjects()) {\r
+ //the OLE2 Class Name of the object\r
+ String oleName = obj.getOLE2ClassName();\r
+ if (oleName.equals("Worksheet")) {\r
+ DirectoryNode dn = (DirectoryNode) obj.getDirectory();\r
+ HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(dn, fs, false);\r
+ //System.out.println(entry.getName() + ": " + embeddedWorkbook.getNumberOfSheets());\r
+ } else if (oleName.equals("Document")) {\r
+ DirectoryNode dn = (DirectoryNode) obj.getDirectory();\r
+ HWPFDocument embeddedWordDocument = new HWPFDocument(dn, fs);\r
+ //System.out.println(entry.getName() + ": " + embeddedWordDocument.getRange().text());\r
+ } else if (oleName.equals("Presentation")) {\r
+ DirectoryNode dn = (DirectoryNode) obj.getDirectory();\r
+ SlideShow embeddedPowerPointDocument = new SlideShow(new HSLFSlideShow(dn, fs));\r
+ //System.out.println(entry.getName() + ": " + embeddedPowerPointDocument.getSlides().length);\r
+ } else {\r
+ if(obj.hasDirectoryEntry()){\r
+ // The DirectoryEntry is a DocumentNode. Examine its entries to find out what it is\r
+ DirectoryNode dn = (DirectoryNode) obj.getDirectory();\r
+ for (Iterator entries = dn.getEntries(); entries.hasNext();) {\r
+ Entry entry = (Entry) entries.next();\r
+ //System.out.println(oleName + "." + entry.getName());\r
+ }\r
+ } else {\r
+ // There is no DirectoryEntry\r
+ // Recover the object’s data from the HSSFObjectData instance.\r
+ byte[] objectData = obj.getObjectData();\r
+ }\r
+ }\r
+ }\r
+ }\r
+}\r
--- /dev/null
+/* ====================================================================\r
+ Licensed to the Apache Software Foundation (ASF) under one or more\r
+ contributor license agreements. See the NOTICE file distributed with\r
+ this work for additional information regarding copyright ownership.\r
+ The ASF licenses this file to You under the Apache License, Version 2.0\r
+ (the "License"); you may not use this file except in compliance with\r
+ the License. You may obtain a copy of the License at\r
+\r
+ http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+ Unless required by applicable law or agreed to in writing, software\r
+ distributed under the License is distributed on an "AS IS" BASIS,\r
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+ See the License for the specific language governing permissions and\r
+ limitations under the License.\r
+==================================================================== */\r
+package org.apache.poi.xssf.usermodel.examples;\r
+\r
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;\r
+import org.apache.poi.openxml4j.opc.OPCPackage;\r
+import org.apache.poi.openxml4j.opc.PackagePart;\r
+import org.apache.poi.xwpf.usermodel.XWPFDocument;\r
+import org.apache.poi.hslf.HSLFSlideShow;\r
+import org.apache.poi.hwpf.HWPFDocument;\r
+import org.apache.poi.xslf.XSLFSlideShow;\r
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;\r
+\r
+import java.io.InputStream;\r
+\r
+/**\r
+ * Demonstrates how you can extract embedded data from a .xlsx file\r
+ */\r
+public class EmbeddedObjects {\r
+ public static void main(String[] args) throws Exception {\r
+ XSSFWorkbook workbook = new XSSFWorkbook(args[0]);\r
+ for (PackagePart pPart : workbook.getAllEmbedds()) {\r
+ String contentType = pPart.getContentType();\r
+ // Excel Workbook – either binary or OpenXML\r
+ if (contentType.equals("application/vnd.ms-excel")) {\r
+ HSSFWorkbook embeddedWorkbook = new HSSFWorkbook(pPart.getInputStream());\r
+ }\r
+ // Excel Workbook – OpenXML file format\r
+ else if (contentType.equals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")) {\r
+ OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());\r
+ XSSFWorkbook embeddedWorkbook = new XSSFWorkbook(docPackage);\r
+ }\r
+ // Word Document – binary (OLE2CDF) file format\r
+ else if (contentType.equals("application/msword")) {\r
+ HWPFDocument document = new HWPFDocument(pPart.getInputStream());\r
+ }\r
+ // Word Document – OpenXML file format\r
+ else if (contentType.equals("application/vnd.openxmlformats-officedocument.wordprocessingml.document")) {\r
+ OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());\r
+ XWPFDocument document = new XWPFDocument(docPackage);\r
+ }\r
+ // PowerPoint Document – binary file format\r
+ else if (contentType.equals("application/vnd.ms-powerpoint")) {\r
+ HSLFSlideShow slideShow = new HSLFSlideShow(pPart.getInputStream());\r
+ }\r
+ // PowerPoint Document – OpenXML file format\r
+ else if (contentType.equals("application/vnd.openxmlformats-officedocument.presentationml.presentation")) {\r
+ OPCPackage docPackage = OPCPackage.open(pPart.getInputStream());\r
+ XSLFSlideShow slideShow = new XSLFSlideShow(docPackage);\r
+ }\r
+ // Any other type of embedded object.\r
+ else {\r
+ System.out.println("Unknown Embedded Document: " + contentType);\r
+ InputStream inputStream = pPart.getInputStream();\r
+ }\r
+ }\r
+ }\r
+}
\ No newline at end of file
extends HSSFShape
implements HSSFShapeContainer
{
- List shapes = new ArrayList();
+ List<HSSFShape> shapes = new ArrayList<HSSFShape>();
int x1 = 0;
int y1 = 0 ;
int x2 = 1023;
/**
* Return all children contained by this shape.
*/
- public List getChildren()
+ public List<HSSFShape> getChildren()
{
return shapes;
}
*
* @return the list of pictures (a list of {@link HSSFPictureData} objects.)
*/
- public List getAllPictures()
+ public List<HSSFPictureData> getAllPictures()
{
// The drawing group record always exists at the top level, so we won't need to do this recursively.
- List pictures = new ArrayList();
+ List<HSSFPictureData> pictures = new ArrayList<HSSFPictureData>();
Iterator recordIter = workbook.getRecords().iterator();
while (recordIter.hasNext())
{
* @param escherRecords the escher records.
* @param pictures the list to populate with the pictures.
*/
- private void searchForPictures(List escherRecords, List pictures)
+ private void searchForPictures(List escherRecords, List<HSSFPictureData> pictures)
{
Iterator recordIter = escherRecords.iterator();
while (recordIter.hasNext())
*
* @return the list of embedded objects (a list of {@link HSSFObjectData} objects.)
*/
- public List getAllEmbeddedObjects()
+ public List<HSSFObjectData> getAllEmbeddedObjects()
{
- List objects = new ArrayList();
+ List<HSSFObjectData> objects = new ArrayList<HSSFObjectData>();
for (int i = 0; i < getNumberOfSheets(); i++)
{
getAllEmbeddedObjects(getSheetAt(i).getSheet().getRecords(), objects);
* @param records the list of records to search.
* @param objects the list of embedded objects to populate.
*/
- private void getAllEmbeddedObjects(List records, List objects)
+ private void getAllEmbeddedObjects(List records, List<HSSFObjectData> objects)
{
Iterator recordIter = records.iterator();
while (recordIter.hasNext())