<uptodate property="main.test.notRequired" targetfile="${main.testokfile}">
<srcfiles dir="${main.src}"/>
<srcfiles dir="${main.src.test}"/>
+ <srcfiles dir="${ooxml.src}"/>
</uptodate>
</target>
<!-- Don't forget to update status.xml too! -->
<release version="3.5.1-alpha1" date="2008-04-??">
+ <action dev="POI-DEVELOPERS" type="add">45018 - Support for fetching embeded documents from within an OOXML file</action>
<action dev="POI-DEVELOPERS" type="add">Port support for setting a policy on missing / blank cells when fetching, to XSSF too</action>
<action dev="POI-DEVELOPERS" type="add">Common text extraction factory, which returns the correct POITextExtractor for the supplied data</action>
<action dev="POI-DEVELOPERS" type="add">Text Extraction support for the new OOXML files (.xlsx, .docx and .pptx)</action>
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.5.1-alpha1" date="2008-04-??">
+ <action dev="POI-DEVELOPERS" type="add">45018 - Support for fetching embeded documents from within an OOXML file</action>
<action dev="POI-DEVELOPERS" type="add">Port support for setting a policy on missing / blank cells when fetching, to XSSF too</action>
<action dev="POI-DEVELOPERS" type="add">Common text extraction factory, which returns the correct POITextExtractor for the supplied data</action>
<action dev="POI-DEVELOPERS" type="add">Text Extraction support for the new OOXML files (.xlsx, .docx and .pptx)</action>
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
+import java.util.LinkedList;
+import java.util.List;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.IOUtils;
public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties";
+ public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject";
+
/** The OPC Package */
private Package pkg;
*/
private POIXMLProperties properties;
+ /**
+ * The embedded OLE2 files in the OPC package
+ */
+ private List<PackagePart> embedds;
protected POIXMLDocument() {}
// Get core part
this.corePart = this.pkg.getPart(coreDocRelationship);
+
+ // Get any embedded OLE2 documents
+ this.embedds = new LinkedList<PackagePart>();
+ for(PackageRelationship rel : corePart.getRelationshipsByType(OLE_OBJECT_REL_TYPE)) {
+ embedds.add(getTargetPart(rel));
+ }
} catch (OpenXML4JException e) {
throw new IOException(e.toString());
}
}
return properties;
}
+
+ /**
+ * Get the document's embedded files.
+ */
+ public List<PackagePart> getAllEmbedds() throws OpenXML4JException
+ {
+ return embedds;
+ }
}
public ExtendedProperties getExtendedProperties() throws IOException, OpenXML4JException, XmlException {
return document.getProperties().getExtendedProperties();
}
+
+ /**
+ * Returns opened document
+ */
+ public POIXMLDocument getDocument(){
+ return document;
+ }
}
public static void main(String[] args) throws Exception {
if(args.length < 1) {
System.err.println("Use:");
- System.err.println(" HXFWordExtractor <filename.xlsx>");
+ System.err.println(" HXFWordExtractor <filename.docx>");
System.exit(1);
}
POIXMLTextExtractor extractor =