<property name="main.output.dir" location="build/classes"/>
<property name="main.output.test.dir" location="build/test-classes"/>
<property name="main.lib" location="lib"/>
- <property name="ooxml.lib" location="ooxml-lib"/>
<property name="main.reports.test" location="build/test-results"/>
<property name="main.jar1.dir" location="${main.lib}/commons-logging-1.1.jar"/>
<property name="main.jar1.url" value="${repository}/commons-logging/jars/commons-logging-1.1.jar"/>
<property name="examples.jar3.url" value="${repository}/commons-lang/jars/commons-lang-2.1.jar"/>
<property name="examples.testokfile" location="build/examples-testokfile.txt"/>
- <!-- Experimental OOXML support: -->
- <property name="ooxml.src" location="src/scratchpad/ooxml-src"/>
- <property name="ooxml.src.test" location="src/scratchpad/ooxml-testcases"/>
- <property name="ooxml.reports.test" location="build/ooxml-test-results"/>
- <property name="ooxml.output.dir" location="build/ooxml-classes"/>
- <property name="ooxml.output.test.dir" location="build/ooxml-test-classes"/>
- <property name="ooxml.testokfile" location="build/ooxml-testokfile.txt"/>
-
- <property name="ooxml.jar1.dir" location="${ooxml.lib}/dom4j-1.6.1.jar"/>
- <property name="ooxml.jar1.url" value="${repository}/dom4j/jars/dom4j-1.6.1.jar"/>
- <property name="ooxml.jar2.dir" location="${ooxml.lib}/jaxen-1.1.jar"/>
- <property name="ooxml.jar2.url" value="${repository}/jaxen/jars/jaxen-1.1.jar"/>
- <property name="ooxml.jar3.dir" location="${ooxml.lib}/xmlbeans-2.3.0.jar"/>
- <property name="ooxml.jar3.url" value="${repository}/org.apache.xmlbeans/jars/xmlbeans-2.3.0.jar"/>
- <property name="ooxml.jar4.dir" location="${ooxml.lib}/jsr173_1.0_api.jar"/>
- <property name="ooxml.jar4.url" value="${repository}/xmlbeans/jars/jsr173_1.0_api.jar"/>
- <!-- No official release of openxml4j yet -->
- <property name="ooxml.jar5.dir" location="${ooxml.lib}/openxml4j-bin-alpha-080124.jar"/>
- <property name="ooxml.jar5.url" value="http://people.apache.org/~nick/openxml4j-bin-prealpha-071224.jar"/>
-
- <!-- See http://www.ecma-international.org/publications/standards/Ecma-376.htm -->
- <!-- "Copy these file(s), free of charge" -->
- <property name="ooxml.xsds.ozip" location="${ooxml.lib}/OfficeOpenXML-Part4.zip"/>
- <property name="ooxml.xsds.izip" location="${ooxml.lib}/OfficeOpenXML-XMLSchema.zip"/>
- <property name="ooxml.xsds.url" value="http://www.ecma-international.org/publications/files/ECMA-ST/Office%20Open%20XML%20Part%204%20(DOCX).zip" />
- <property name="ooxml.xsds.jar" location="${ooxml.lib}/ooxml-schemas.jar"/>
-
<property name="build.site" location="build/tmp/site/build/site"/>
<property name="build.site.src" location="build/tmp/site"/>
<property name="junit.report.dir" location="${build.site}/junit"/>
<pathelement location="${contrib.output.test.dir}"/>
</path>
- <path id="ooxml.classpath">
- <path refid="main.classpath"/>
- <path refid="scratchpad.classpath"/>
- <fileset dir="${ooxml.lib}">
- <include name="*.jar" />
- </fileset>
- </path>
-
-
<path id="examples.classpath">
<path refid="main.classpath"/>
<pathelement location="${main.output.dir}"/>
<mkdir dir="${scratchpad.output.dir}"/>
<mkdir dir="${contrib.output.dir}"/>
<mkdir dir="${examples.output.dir}"/>
- <mkdir dir="${ooxml.output.dir}"/>
<mkdir dir="${main.output.test.dir}"/>
<mkdir dir="${contrib.output.test.dir}"/>
<mkdir dir="${scratchpad.output.test.dir}"/>
- <mkdir dir="${ooxml.output.test.dir}"/>
<mkdir dir="${main.reports.test}"/>
<mkdir dir="${scratchpad.reports.test}"/>
<mkdir dir="${contrib.reports.test}"/>
- <mkdir dir="${ooxml.reports.test}"/>
<mkdir dir="${junit.report.dir}"/>
<mkdir dir="${jdepend.report.dir}"/>
<mkdir dir="${jdepend.report.out.dir}"/>
<available file="${contrib.jar2.dir}"/>
<available file="${contrib.jar3.dir}"/>
<available file="${junit.jar1.dir}"/>
- <available file="${ooxml.jar1.dir}"/>
- <available file="${ooxml.jar2.dir}"/>
- <available file="${ooxml.jar3.dir}"/>
- <available file="${ooxml.jar4.dir}"/>
- <available file="${ooxml.jar5.dir}"/>
</and>
<isset property="disconnected"/>
</or>
<get src="${contrib.jar2.url}" dest="${contrib.jar2.dir}"/>
<get src="${contrib.jar3.url}" dest="${contrib.jar3.dir}"/>
<get src="${junit.jar1.url}" dest="${junit.jar1.dir}"/>
-
- <get src="${ooxml.jar1.url}" dest="${ooxml.jar1.dir}"/>
- <get src="${ooxml.jar2.url}" dest="${ooxml.jar2.dir}"/>
- <get src="${ooxml.jar3.url}" dest="${ooxml.jar3.dir}"/>
- <get src="${ooxml.jar4.url}" dest="${ooxml.jar4.dir}"/>
- <get src="${ooxml.jar5.url}" dest="${ooxml.jar5.dir}"/>
</target>
- <target name="check-ooxml-xsds">
- <condition property="ooxml-xsds.present">
- <or>
- <and>
- <available file="${ooxml.xsds.izip}"/>
- </and>
- <isset property="disconnected"/>
- </or>
- </condition>
- </target>
- <target name="fetch-ooxml-xsds" unless="ooxml-xsds.present"
- description="Fetches needed OOXML xsd files from the Internet">
- <get src="${ooxml.xsds.url}" dest="${ooxml.xsds.ozip}"/>
- <unzip src="${ooxml.xsds.ozip}" dest="${ooxml.lib}">
- <patternset>
- <include name="OfficeOpenXML-XMLSchema.zip" />
- </patternset>
- </unzip>
- </target>
- <target name="check-compiled-ooxml-xsds">
- <condition property="ooxml-compiled-xsds.present">
- <or>
- <and>
- <available file="${ooxml.xsds.jar}"/>
- </and>
- <isset property="disconnected"/>
- </or>
- </condition>
- </target>
- <target name="compile-ooxml-xsds" unless="ooxml-compiled-xsds.present"
- depends="check-jars,fetch-jars,check-ooxml-xsds,fetch-ooxml-xsds,check-compiled-ooxml-xsds"
- description="Unpacks the OOXML xsd files, and compiles them into XmlBeans">
- <taskdef name="xmlbean"
- classname="org.apache.xmlbeans.impl.tool.XMLBean"
- classpath="${ooxml.jar3.dir}:${ooxml.jar4.dir}" />
-
- <unzip src="${ooxml.xsds.izip}" dest="build/ooxml-xsds/" />
- <!--
- schema="build/ooxml-xsds/"
- schema="build/ooxml-xsds/sml-workbook.xsd"
- -->
- <xmlbean
- schema="build/ooxml-xsds/"
- destfile="${ooxml.xsds.jar}"
- javasource="1.4"
- failonerror="false"
- fork="true"
- memoryMaximumSize="512m"
- >
- <classpath refid="ooxml.classpath"/>
- </xmlbean>
- </target>
-
<target name="compile" depends="init, compile-main, compile-scratchpad,
compile-contrib, compile-examples"
description="Compiles the POI main classes, scratchpad, contrib, and examples"/>
</javac>
</target>
- <target name="compile-ooxml" depends="init, check-ooxml-xsds, fetch-ooxml-xsds, compile-ooxml-xsds, compile-main">
- <!-- openxml4j requires java 1.5, so so must we, for now -->
- <javac target="1.5" source="1.5"
- destdir="${ooxml.output.dir}" debug="on" srcdir="${ooxml.src}">
- <classpath refid="ooxml.classpath"/>
- </javac>
-
- <javac target="1.5" source="1.5"
- failonerror="true" destdir="${ooxml.output.test.dir}" debug="on"
- fork="yes" srcdir="${ooxml.src.test}">
- <classpath>
- <path refid="ooxml.classpath"/>
- <pathelement location="${ooxml.output.dir}"/>
- <pathelement location="${junit.jar1.dir}"/>
- </classpath>
- </javac>
- </target>
-
<target name="test" depends="test-main,test-scratchpad,test-contrib"
description="Tests main, contrib and scratchpad"/>
<echo file="${contrib.testokfile}" append="false" message="testok"/>
</target>
- <target name="-test-ooxml-check">
- <uptodate property="ooxml.test.notRequired" targetfile="${ooxml.testokfile}">
- <srcfiles dir="${ooxml.src}"/>
- <srcfiles dir="${ooxml.src.test}"/>
- </uptodate>
- </target>
-
- <target name="test-ooxml" depends="compile-main,compile-ooxml,-test-ooxml-check" unless="ooxml.test.notRequired">
- <junit printsummary="yes" fork="no" haltonfailure="${halt.on.test.failure}" failureproperty="ooxml.test.failed">
- <classpath>
- <path refid="ooxml.classpath"/>
- <pathelement location="${main.output.dir}"/>
- <pathelement location="${ooxml.output.dir}"/>
- <pathelement location="${ooxml.output.test.dir}"/>
- <pathelement location="${junit.jar1.dir}"/>
- </classpath>
- <sysproperty key="HSSF.testdata.path" file="${main.src.test}/org/apache/poi/hssf/data"/>
- <sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
- <sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
- <sysproperty key="java.awt.headless" value="true"/>
- <formatter type="plain"/>
- <formatter type="xml"/>
- <batchtest todir="${ooxml.reports.test}">
- <fileset dir="${ooxml.src.test}">
- <include name="**/Test*.java"/>
- <exclude name="**/AllTests.java"/>
- </fileset>
- </batchtest>
- </junit>
- <delete file="${ooxml.testokfile}"/>
- <antcall target="-test-ooxml-write-testfile"/>
- </target>
-
- <target name="-test-ooxml-write-testfile" unless="ooxml.test.failed">
- <echo file="${ooxml.testokfile}" append="false" message="testok"/>
- </target>
-
<target name="-check-docs">
<uptodate property="main.docs.notRequired" targetfile="${build.site}/index.html">
<srcfiles dir="${build.site.src}"/>
</manifest>
</jar>
</target>
- <target name="jar-ooxml" depends="compile-ooxml" description="Creates the ooxml jar files for distribution">
- <jar destfile="${dist.dir}/${jar.name}-ooxml-${version.id}-${DSTAMP}.jar">
- <fileset dir="${ooxml.output.dir}" />
- <fileset dir="legal/" />
- <manifest>
- <attribute name="Built-By" value="${user.name}"/>
- <attribute name="Specification-Title" value="Apache POI"/>
- <attribute name="Specification-Version" value="${version.id}-${DSTAMP}"/>
- <attribute name="Specification-Vendor" value="Apache"/>
- <attribute name="Implementation-Title" value="Apache POI"/>
- <attribute name="Implementation-Version" value="${version.id}-${DSTAMP}"/>
- <attribute name="Implementation-Vendor" value="Apache"/>
- </manifest>
- </jar>
- </target>
<target name="dist" depends="clean, fail-unless-tools-are-available, compile, site, jar"
description="Creates the entire distribution into build/dist, from scratch">
<zip destfile="${dist.dir}/${jar.name}-bin-${version.id}-${DSTAMP}.zip">
<zipfileset dir="legal/" prefix="${zipdir}" />
+ <zipfileset dir="lib/" prefix="${zipdir}/lib" />
<zipfileset dir="${build.site}" prefix="${zipdir}/docs"/>
<zipfileset file="${dist.dir}/${jar.name}-${version.id}-${DSTAMP}.jar" prefix="${zipdir}" />
<zipfileset file="${dist.dir}/${jar.name}-contrib-${version.id}-${DSTAMP}.jar" prefix="${zipdir}" />
<tar destfile="${dist.dir}/${jar.name}-bin-${version.id}-${DSTAMP}.tar.gz"
compression="gzip">
<tarfileset dir="legal/" prefix="${zipdir}" />
+ <tarfileset dir="lib/" prefix="${zipdir}/lib" />
<tarfileset dir="${build.site}" prefix="${zipdir}/docs"/>
<tarfileset file="${dist.dir}/${jar.name}-${version.id}-${DSTAMP}.jar" prefix="${zipdir}" />
<tarfileset file="${dist.dir}/${jar.name}-contrib-${version.id}-${DSTAMP}.jar" prefix="${zipdir}" />
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-import org.apache.poi.hxf.HXFDocument;
-
-/**
- * Parent class of all UserModel POI XML (ooxml)
- * implementations.
- * Provides a similar function to {@link POIDocument},
- * for the XML based classes.
- */
-public abstract class POIXMLDocument {
- private HXFDocument document;
-
- /**
- * Creates a new POI XML Document, wrapping up
- * the underlying raw HXFDocument
- */
- protected POIXMLDocument(HXFDocument document) {
- this.document = document;
- }
-
- /**
- * Returns the underlying HXFDocument, typically
- * used for unit testing
- */
- public HXFDocument _getHXFDocument() {
- return document;
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi;
-
-public abstract class POIXMLTextExtractor extends POITextExtractor {
- /** The POIXMLDocument that's open */
- protected POIXMLDocument document;
-
- /**
- * Creates a new text extractor for the given document
- */
- public POIXMLTextExtractor(POIXMLDocument document) {
- super(null);
-
- this.document = document;
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf;
-
-import java.io.IOException;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.InvalidFormatException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxml4j.opc.PackageRelationshipCollection;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMaster;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
-import org.openxmlformats.schemas.presentationml.x2006.main.NotesDocument;
-import org.openxmlformats.schemas.presentationml.x2006.main.PresentationDocument;
-import org.openxmlformats.schemas.presentationml.x2006.main.SldDocument;
-import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument;
-
-/**
- * Experimental class to do low level processing
- * of pptx files.
- *
- * If you are using these low level classes, then you
- * will almost certainly need to refer to the OOXML
- * specifications from
- * http://www.ecma-international.org/publications/standards/Ecma-376.htm
- *
- * WARNING - APIs expected to change rapidly
- */
-public class HSLFXML extends HXFDocument {
- public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml";
- public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml";
- public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml";
- public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout";
- public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide";
-
- private PresentationDocument presentationDoc;
-
- public HSLFXML(Package container) throws OpenXML4JException, IOException, XmlException {
- super(container, MAIN_CONTENT_TYPE);
-
- presentationDoc =
- PresentationDocument.Factory.parse(basePart.getInputStream());
- }
-
- /**
- * Returns the low level presentation base object
- */
- public CTPresentation getPresentation() {
- return presentationDoc.getPresentation();
- }
-
- /**
- * Returns the references from the presentation to its
- * slides.
- * You'll need these to figure out the slide ordering,
- * and to get at the actual slides themselves
- */
- public CTSlideIdList getSlideReferences() {
- return getPresentation().getSldIdLst();
- }
- /**
- * Returns the references from the presentation to its
- * slide masters.
- * You'll need these to get at the actual slide
- * masters themselves
- */
- public CTSlideMasterIdList getSlideMasterReferences() {
- return getPresentation().getSldMasterIdLst();
- }
-
- /**
- * Returns the low level slide master object from
- * the supplied slide master reference
- */
- public CTSlideMaster getSlideMaster(CTSlideMasterIdListEntry master) throws IOException, XmlException {
- PackagePart masterPart =
- getRelatedPackagePart(master.getId2());
- SldMasterDocument masterDoc =
- SldMasterDocument.Factory.parse(masterPart.getInputStream());
- return masterDoc.getSldMaster();
- }
-
- /**
- * Returns the low level slide object from
- * the supplied slide reference
- */
- public CTSlide getSlide(CTSlideIdListEntry slide) throws IOException, XmlException {
- PackagePart slidePart =
- getRelatedPackagePart(slide.getId2());
- SldDocument slideDoc =
- SldDocument.Factory.parse(slidePart.getInputStream());
- return slideDoc.getSld();
- }
-
- /**
- * Returns the low level notes object for the given
- * slide, as found from the supplied slide reference
- */
- public CTNotesSlide getNotes(CTSlideIdListEntry slide) throws IOException, XmlException {
- PackagePart slidePart =
- getRelatedPackagePart(slide.getId2());
-
- PackageRelationshipCollection notes;
- try {
- notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE);
- } catch(InvalidFormatException e) {
- throw new IllegalStateException(e);
- }
-
- if(notes.size() == 0) {
- // No notes for this slide
- return null;
- }
- if(notes.size() > 1) {
- throw new IllegalStateException("Expecting 0 or 1 notes for a slide, but found " + notes.size());
- }
-
- PackagePart notesPart =
- getPackagePart(notes.getRelationship(0));
- NotesDocument notesDoc =
- NotesDocument.Factory.parse(notesPart.getInputStream());
-
- return notesDoc.getNotes();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf.extractor;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hslf.HSLFXML;
-import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow;
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-
-public class HXFPowerPointExtractor extends POIXMLTextExtractor {
- private HSLFXMLSlideShow slideshow;
- private boolean slidesByDefault = true;
- private boolean notesByDefault = false;
-
- public HXFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
- this(new HSLFXMLSlideShow(
- new HSLFXML(container)
- ));
- }
- public HXFPowerPointExtractor(HSLFXMLSlideShow slideshow) {
- super(slideshow);
- this.slideshow = slideshow;
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" HXFPowerPointExtractor <filename.pptx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new HXFPowerPointExtractor(HXFDocument.openPackage(
- new File(args[0])
- ));
- System.out.println(extractor.getText());
- }
-
- /**
- * Should a call to getText() return slide text?
- * Default is yes
- */
- public void setSlidesByDefault(boolean slidesByDefault) {
- this.slidesByDefault = slidesByDefault;
- }
- /**
- * Should a call to getText() return notes text?
- * Default is no
- */
- public void setNotesByDefault(boolean notesByDefault) {
- this.notesByDefault = notesByDefault;
- }
-
- /**
- * Gets the slide text, but not the notes text
- */
- public String getText() {
- return getText(slidesByDefault, notesByDefault);
- }
-
- /**
- * Gets the requested text from the file
- * @param slideText Should we retrieve text from slides?
- * @param notesText Should we retrieve text from notes?
- */
- public String getText(boolean slideText, boolean notesText) {
- StringBuffer text = new StringBuffer();
-
- CTSlideIdListEntry[] slideRefs =
- slideshow._getHSLFXML().getSlideReferences().getSldIdArray();
- for (int i = 0; i < slideRefs.length; i++) {
- try {
- CTSlide slide =
- slideshow._getHSLFXML().getSlide(slideRefs[i]);
- CTNotesSlide notes =
- slideshow._getHSLFXML().getNotes(slideRefs[i]);
-
- if(slideText) {
- extractText(slide.getCSld().getSpTree(), text);
- }
- if(notesText && notes != null) {
- extractText(notes.getCSld().getSpTree(), text);
- }
- } catch(Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- return text.toString();
- }
-
- private void extractText(CTGroupShape gs, StringBuffer text) {
- CTShape[] shapes = gs.getSpArray();
- for (int i = 0; i < shapes.length; i++) {
- CTTextBody textBody =
- shapes[i].getTxBody();
- if(textBody != null) {
- CTTextParagraph[] paras =
- textBody.getPArray();
- for (int j = 0; j < paras.length; j++) {
- CTRegularTextRun[] textRuns =
- paras[j].getRArray();
- for (int k = 0; k < textRuns.length; k++) {
- text.append( textRuns[k].getT() );
- }
- // End each paragraph with a new line
- text.append("\n");
- }
- }
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf.usermodel;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.hslf.HSLFXML;
-
-/**
- * High level representation of a ooxml slideshow.
- * This is the first object most users will construct whether
- * they are reading or writing a slideshow. It is also the
- * top level object for creating new slides/etc.
- */
-public class HSLFXMLSlideShow extends POIXMLDocument {
- private org.apache.poi.hslf.HSLFXML hslfXML;
-
- public HSLFXMLSlideShow(HSLFXML xml) {
- super(xml);
- this.hslfXML = xml;
- }
-
- public HSLFXML _getHSLFXML() {
- return hslfXML;
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf;
-
-import java.io.IOException;
-
-import org.apache.poi.hssf.model.SharedStringsTable;
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheets;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorksheetDocument;
-
-/**
- * Experimental class to do low level processing
- * of xlsx files.
- *
- * If you are using these low level classes, then you
- * will almost certainly need to refer to the OOXML
- * specifications from
- * http://www.ecma-international.org/publications/standards/Ecma-376.htm
- *
- * WARNING - APIs expected to change rapidly
- */
-public class HSSFXML extends HXFDocument {
- public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml";
- public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml";
- public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml";
- public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
-
- private WorkbookDocument workbookDoc;
- private SharedStringsTable sharedStrings;
-
- public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
- super(container, MAIN_CONTENT_TYPE);
-
- workbookDoc =
- WorkbookDocument.Factory.parse(basePart.getInputStream());
-
- PackagePart ssPart = getSinglePartByRelationType(SHARED_STRINGS_RELATION_TYPE, basePart);
- if (ssPart != null) {
- sharedStrings = new SharedStringsTable(ssPart);
- } else {
-
- }
- }
-
- /**
- * Returns the low level workbook base object
- */
- public CTWorkbook getWorkbook() {
- return workbookDoc.getWorkbook();
- }
- /**
- * Returns the references from the workbook to its
- * sheets.
- * You'll need these to figure out the sheet ordering,
- * and to get at the actual sheets themselves
- */
- public CTSheets getSheetReferences() {
- return getWorkbook().getSheets();
- }
- /**
- * Returns the low level (work)sheet object from
- * the supplied sheet reference
- */
- public CTWorksheet getSheet(CTSheet sheet) throws IOException, XmlException {
- PackagePart sheetPart =
- getRelatedPackagePart(sheet.getId());
- WorksheetDocument sheetDoc =
- WorksheetDocument.Factory.parse(sheetPart.getInputStream());
- return sheetDoc.getWorksheet();
- }
-
- /**
- * Returns the shared string at the given index
- */
- public String getSharedString(int index) {
- return this.sharedStrings.get(index);
- }
- protected SharedStringsTable _getSharedStringsTable() {
- return sharedStrings;
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.extractor;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hssf.HSSFXML;
-import org.apache.poi.hssf.usermodel.HSSFXMLCell;
-import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
-
-/**
- * Helper class to extract text from an OOXML Excel file
- */
-public class HXFExcelExtractor extends POIXMLTextExtractor {
- private HSSFXMLWorkbook workbook;
- private boolean includeSheetNames = true;
- private boolean formulasNotResults = false;
-
- public HXFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
- this(new HSSFXMLWorkbook(
- new HSSFXML(container)
- ));
- }
- public HXFExcelExtractor(HSSFXMLWorkbook workbook) {
- super(workbook);
- this.workbook = workbook;
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" HXFExcelExtractor <filename.xlsx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new HXFExcelExtractor(HXFDocument.openPackage(
- new File(args[0])
- ));
- System.out.println(extractor.getText());
- }
-
- /**
- * Should sheet names be included? Default is true
- */
- public void setIncludeSheetNames(boolean includeSheetNames) {
- this.includeSheetNames = includeSheetNames;
- }
- /**
- * Should we return the formula itself, and not
- * the result it produces? Default is false
- */
- public void setFormulasNotResults(boolean formulasNotResults) {
- this.formulasNotResults = formulasNotResults;
- }
-
- /**
- * Retreives the text contents of the file
- */
- public String getText() {
- StringBuffer text = new StringBuffer();
-
- CTSheet[] sheetRefs =
- workbook._getHSSFXML().getSheetReferences().getSheetArray();
- for(int i=0; i<sheetRefs.length; i++) {
- try {
- CTWorksheet sheet =
- workbook._getHSSFXML().getSheet(sheetRefs[i]);
- CTRow[] rows =
- sheet.getSheetData().getRowArray();
-
- if(i > 0) {
- text.append("\n");
- }
- if(includeSheetNames) {
- text.append(sheetRefs[i].getName() + "\n");
- }
-
- for(int j=0; j<rows.length; j++) {
- CTCell[] cells = rows[j].getCArray();
- for(int k=0; k<cells.length; k++) {
- CTCell cell = cells[k];
- if(k > 0) {
- text.append("\t");
- }
-
- boolean done = false;
-
- // Is it a formula one?
- if(cell.getF() != null) {
- if(formulasNotResults) {
- text.append(cell.getF().getStringValue());
- done = true;
- }
- }
- if(!done) {
- HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
- text.append(uCell.getStringValue());
- }
- }
- text.append("\n");
- }
- } catch(Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- return text.toString();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.hssf.model;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.LinkedList;
-
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
-
-
-public class SharedStringsTable extends LinkedList<String> {
- public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
-
- private SstDocument doc;
- private PackagePart part;
-
- public SharedStringsTable(PackagePart part) throws IOException, XmlException {
- this.part = part;
- doc = SstDocument.Factory.parse(
- part.getInputStream()
- );
- read();
- }
-
- private void read() {
- CTRst[] sts = doc.getSst().getSiArray();
- for (int i = 0; i < sts.length; i++) {
- add(sts[i].getT());
- }
- }
-
- /**
- * Writes the current shared strings table into
- * the associated OOXML PackagePart
- */
- public void write() throws IOException {
- CTSst sst = doc.getSst();
-
- // Remove the old list
- for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) {
- sst.removeSi(i);
- }
-
- // Add the new one
- for(String s : this) {
- sst.addNewSi().setT(s);
- }
-
- // Update the counts
- sst.setCount(this.size());
- sst.setUniqueCount(this.size());
-
- // Write out
- OutputStream out = part.getOutputStream();
- doc.save(out);
- out.close();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.usermodel;
-
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;
-
-/**
- * User facing wrapper around an underlying cell object
- */
-public class HSSFXMLCell {
- private CTCell cell;
-
- /** The workbook to which this cell belongs */
- private final HSSFXMLWorkbook workbook;
-
- public HSSFXMLCell(CTCell rawCell, HSSFXMLWorkbook workbook) {
- this.cell = rawCell;
- this.workbook = workbook;
- }
-
- /**
- * Formats the cell's contents, based on its type,
- * and returns it as a string.
- */
- public String getStringValue() {
-
- switch (cell.getT().intValue()) {
- case STCellType.INT_S:
- return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
- case STCellType.INT_INLINE_STR:
- return cell.getV();
- case STCellType.INT_N:
- return cell.getV();
- // TODO: support other types
- default:
- return "UNSUPPORTED CELL TYPE: '" + cell.getT() + "'";
- }
- }
-
- public String toString() {
- return cell.getR() + " - " + getStringValue();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.usermodel;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.hssf.HSSFXML;
-
-/**
- * High level representation of a ooxml workbook.
- * This is the first object most users will construct whether
- * they are reading or writing a workbook. It is also the
- * top level object for creating new sheets/etc.
- */
-public class HSSFXMLWorkbook extends POIXMLDocument {
- private HSSFXML hssfXML;
-
- public HSSFXMLWorkbook(HSSFXML xml) {
- super(xml);
- this.hssfXML = xml;
- }
-
- public HSSFXML _getHSSFXML() {
- return hssfXML;
- }
-
- public String getSharedString(int index) {
- return hssfXML.getSharedString(index);
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf;
-
-import java.io.IOException;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.InvalidFormatException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
-
-/**
- * Experimental class to do low level processing
- * of docx files.
- *
- * If you are using these low level classes, then you
- * will almost certainly need to refer to the OOXML
- * specifications from
- * http://www.ecma-international.org/publications/standards/Ecma-376.htm
- *
- * WARNING - APIs expected to change rapidly
- */
-public class HWPFXML extends HXFDocument {
- public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
- public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
- public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
- public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
- public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
-
- private DocumentDocument wordDoc;
-
- public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException {
- super(container, MAIN_CONTENT_TYPE);
-
- wordDoc =
- DocumentDocument.Factory.parse(basePart.getInputStream());
- }
-
- /**
- * Returns the low level document base object
- */
- public CTDocument1 getDocument() {
- return wordDoc.getDocument();
- }
-
- /**
- * Returns the low level body of the document
- */
- public CTBody getDocumentBody() {
- return getDocument().getBody();
- }
-
- /**
- * Returns the styles object used
- */
- public CTStyles getStyle() throws XmlException, IOException {
- PackagePart[] parts;
- try {
- parts = getRelatedByType(STYLES_RELATION_TYPE);
- } catch(InvalidFormatException e) {
- throw new IllegalStateException(e);
- }
- if(parts.length != 1) {
- throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length);
- }
-
- StylesDocument sd =
- StylesDocument.Factory.parse(parts[0].getInputStream());
- return sd.getStyles();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hwpf.HWPFXML;
-import org.apache.poi.hwpf.usermodel.HWPFXMLDocument;
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
-
-/**
- * Helper class to extract text from an OOXML Word file
- */
-public class HXFWordExtractor extends POIXMLTextExtractor {
- private HWPFXMLDocument document;
-
- public HXFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
- this(new HWPFXMLDocument(
- new HWPFXML(container)
- ));
- }
- public HXFWordExtractor(HWPFXMLDocument document) {
- super(document);
- this.document = document;
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" HXFWordExtractor <filename.xlsx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new HXFWordExtractor(HXFDocument.openPackage(
- new File(args[0])
- ));
- System.out.println(extractor.getText());
- }
-
- public String getText() {
- CTBody body = document._getHWPFXML().getDocumentBody();
- StringBuffer text = new StringBuffer();
-
- // Loop over paragraphs
- CTP[] ps = body.getPArray();
- for (int i = 0; i < ps.length; i++) {
- // Loop over ranges
- CTR[] rs = ps[i].getRArray();
- for (int j = 0; j < rs.length; j++) {
- // Loop over text runs
- CTText[] texts = rs[j].getTArray();
- for (int k = 0; k < texts.length; k++) {
- text.append(
- texts[k].getStringValue()
- );
- }
- }
- // New line after each paragraph.
- text.append("\n");
- }
-
- return text.toString();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.usermodel;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.hwpf.HWPFXML;
-
-/**
- * High level representation of a ooxml text document.
- */
-public class HWPFXMLDocument extends POIXMLDocument {
- private HWPFXML hwpfXML;
-
- public HWPFXMLDocument(HWPFXML xml) {
- super(xml);
- this.hwpfXML = xml;
- }
-
- public HWPFXML _getHWPFXML() {
- return hwpfXML;
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hxf;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PushbackInputStream;
-import java.util.ArrayList;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.poifs.common.POIFSConstants;
-import org.apache.poi.poifs.storage.HeaderBlockConstants;
-import org.apache.poi.util.IOUtils;
-import org.apache.poi.util.LongField;
-import org.apache.xmlbeans.XmlException;
-import org.dom4j.Document;
-import org.dom4j.DocumentException;
-import org.dom4j.io.SAXReader;
-import org.openxml4j.exceptions.InvalidFormatException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackageAccess;
-import org.openxml4j.opc.PackagePart;
-import org.openxml4j.opc.PackagePartName;
-import org.openxml4j.opc.PackageRelationship;
-import org.openxml4j.opc.PackageRelationshipCollection;
-import org.openxml4j.opc.PackagingURIHelper;
-import org.openxml4j.opc.internal.PackagePropertiesPart;
-import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
-import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
-
-/**
- * Parent class of the low level interface to
- * all POI XML (OOXML) implementations.
- * Normal users should probably deal with things that
- * extends {@link POIXMLDocument}, unless they really
- * do need to get low level access to the files.
- *
- * If you are using these low level classes, then you
- * will almost certainly need to refer to the OOXML
- * specifications from
- * http://www.ecma-international.org/publications/standards/Ecma-376.htm
- *
- * WARNING - APIs expected to change rapidly
- */
-public abstract class HXFDocument {
- public static final String CORE_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties";
- public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties";
-
- /**
- * File package/container.
- */
- protected Package container;
- /**
- * The Package Part for our base document
- */
- protected PackagePart basePart;
- /**
- * The base document of this instance, eg Workbook for
- * xslsx
- */
- protected Document baseDocument;
-
- protected HXFDocument(Package container, String baseContentType) throws OpenXML4JException {
- this.container = container;
-
- // Find the base document
- basePart = getSinglePartByType(baseContentType);
-
- // And load it up
- try {
- SAXReader reader = new SAXReader();
- baseDocument = reader.read(basePart.getInputStream());
- } catch (DocumentException e) {
- throw new OpenXML4JException(e.getMessage());
- } catch (IOException ioe) {
- throw new OpenXML4JException(ioe.getMessage());
- }
- }
-
- /**
- * Checks that the supplied InputStream (which MUST
- * support mark and reset, or be a PushbackInputStream)
- * has a OOXML (zip) header at the start of it.
- * If your InputStream does not support mark / reset,
- * then wrap it in a PushBackInputStream, then be
- * sure to always use that, and not the original!
- * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
- */
- public static boolean hasOOXMLHeader(InputStream inp) throws IOException {
- // We want to peek at the first 4 bytes
- inp.mark(4);
-
- byte[] header = new byte[4];
- IOUtils.readFully(inp, header);
-
- // Wind back those 4 bytes
- if(inp instanceof PushbackInputStream) {
- PushbackInputStream pin = (PushbackInputStream)inp;
- pin.unread(header);
- } else {
- inp.reset();
- }
-
- // Did it match the ooxml zip signature?
- return (
- header[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
- header[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
- header[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
- header[3] == POIFSConstants.OOXML_FILE_HEADER[3]
- );
- }
-
- /**
- * Fetches the (single) PackagePart with the supplied
- * content type.
- * @param contentType The content type to search for
- * @throws IllegalArgumentException If we don't find a single part of that type
- */
- private PackagePart getSinglePartByType(String contentType) throws IllegalArgumentException {
- ArrayList<PackagePart> parts =
- container.getPartsByContentType(contentType);
- if(parts.size() != 1) {
- throw new IllegalArgumentException("Expecting one entry with content type of " + contentType + ", but found " + parts.size());
- }
- return parts.get(0);
- }
-
- /**
- * Fetches the (single) PackagePart which is defined as
- * the supplied relation content type of the specified part,
- * or null if none found.
- * @param relationType The relation content type to search for
- * @throws IllegalArgumentException If we find more than one part of that type
- * TODO: this sucks! Make Package and PackagePart implement common intf that defines getRelationshipsByType & friends
- */
- protected PackagePart getSinglePartByRelationType(String relationType, PackagePart part) throws IllegalArgumentException, OpenXML4JException {
- PackageRelationshipCollection rels =
- part.getRelationshipsByType(relationType);
- if(rels.size() == 0) {
- return null;
- }
- if(rels.size() > 1) {
- throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!");
- }
- PackageRelationship rel = rels.getRelationship(0);
- return getPackagePart(rel);
- }
-
- /**
- * Fetches the (single) PackagePart which is defined as
- * the supplied relation content type of the base
- * container, or null if none found.
- * @param relationType The relation content type to search for
- * @throws IllegalArgumentException If we find more than one part of that type
- */
- protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
- PackageRelationshipCollection rels =
- container.getRelationshipsByType(relationType);
- if(rels.size() == 0) {
- return null;
- }
- if(rels.size() > 1) {
- throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!");
- }
- PackageRelationship rel = rels.getRelationship(0);
- return getPackagePart(rel);
- }
-
- /**
- * Retrieves the PackagePart for the given relation
- * id. This will normally come from a r:id attribute
- * on part of the base document.
- * @param partId The r:id pointing to the other PackagePart
- */
- protected PackagePart getRelatedPackagePart(String partId) {
- PackageRelationship rel =
- basePart.getRelationship(partId);
- return getPackagePart(rel);
- }
-
- /**
- * Retrieves the PackagePart for the given Relationship
- * object. Normally you'll want to go via a content type
- * or r:id to get one of those.
- */
- protected PackagePart getPackagePart(PackageRelationship rel) {
- PackagePartName relName;
- try {
- relName = PackagingURIHelper.createPartName(rel.getTargetURI());
- } catch(InvalidFormatException e) {
- throw new InternalError(e.getMessage());
- }
-
- PackagePart part = container.getPart(relName);
- if(part == null) {
- throw new IllegalArgumentException("No part found for rel " + rel);
- }
- return part;
- }
-
- /**
- * Retrieves all the PackageParts which are defined as
- * relationships of the base document with the
- * specified content type.
- */
- protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
- PackageRelationshipCollection partsC =
- basePart.getRelationshipsByType(contentType);
-
- PackagePart[] parts = new PackagePart[partsC.size()];
- int count = 0;
- for (PackageRelationship rel : partsC) {
- parts[count] = getPackagePart(rel);
- count++;
- }
- return parts;
- }
-
- /**
- * Get the package container.
- * @return The package associated to this document.
- */
- public Package getPackage() {
- return container;
- }
-
- /**
- * Get the core document properties (core ooxml properties).
- */
- public PackagePropertiesPart getCoreProperties() throws OpenXML4JException, XmlException, IOException {
- PackagePart propsPart = getSinglePartByRelationType(CORE_PROPERTIES_REL_TYPE);
- if(propsPart == null) {
- return null;
- }
- return (PackagePropertiesPart)propsPart;
- }
-
- /**
- * Get the extended document properties (extended ooxml properties)
- */
- public CTProperties getExtendedProperties() throws OpenXML4JException, XmlException, IOException {
- PackagePart propsPart = getSinglePartByRelationType(EXTENDED_PROPERTIES_REL_TYPE);
-
- PropertiesDocument props = PropertiesDocument.Factory.parse(
- propsPart.getInputStream());
- return props.getProperties();
- }
-
- /**
- * Returns an opened OOXML Package for the supplied File
- * @param f File to open
- */
- public static Package openPackage(File f) throws InvalidFormatException {
- return Package.open(f.toString(), PackageAccess.READ_WRITE);
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hxf.dev;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PrintStream;
-import java.util.ArrayList;
-
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackageAccess;
-import org.openxml4j.opc.PackagePart;
-import org.openxml4j.opc.PackageRelationship;
-import org.openxml4j.opc.PackageRelationshipCollection;
-
-/**
- * Prints out the contents of a HXF (ooxml) container.
- * Useful for seeing what parts are defined, and how
- * they're all related to each other.
- */
-public class HXFLister {
- private Package container;
- private PrintStream disp;
-
- public HXFLister(Package container) {
- this(container, System.out);
- }
- public HXFLister(Package container, PrintStream disp) {
- this.container = container;
- this.disp = disp;
- }
-
- /**
- * Figures out how big a given PackagePart is.
- */
- public static long getSize(PackagePart part) throws IOException {
- InputStream in = part.getInputStream();
- byte[] b = new byte[8192];
- long size = 0;
- int read = 0;
-
- while(read > -1) {
- read = in.read(b);
- if(read > 0) {
- size += read;
- }
- }
-
- return size;
- }
-
- /**
- * Displays information on all the different
- * parts of the OOXML file container.
- */
- public void displayParts() throws Exception {
- ArrayList<PackagePart> parts = container.getParts();
- for (PackagePart part : parts) {
- disp.println(part.getPartName());
- disp.println("\t" + part.getContentType());
-
- if(! part.getPartName().toString().equals("/docProps/core.xml")) {
- disp.println("\t" + getSize(part) + " bytes");
- }
-
- if(! part.isRelationshipPart()) {
- disp.println("\t" + part.getRelationships().size() + " relations");
- for(PackageRelationship rel : part.getRelationships()) {
- displayRelation(rel, "\t ");
- }
- }
- }
- }
- /**
- * Displays information on all the different
- * relationships between different parts
- * of the OOXML file container.
- */
- public void displayRelations() throws Exception {
- PackageRelationshipCollection rels =
- container.getRelationships();
- for (PackageRelationship rel : rels) {
- displayRelation(rel, "");
- }
- }
- private void displayRelation(PackageRelationship rel, String indent) {
- disp.println(indent+"Relationship:");
- disp.println(indent+"\tFrom: "+ rel.getSourceURI());
- disp.println(indent+"\tTo: " + rel.getTargetURI());
- disp.println(indent+"\tID: " + rel.getId());
- disp.println(indent+"\tMode: " + rel.getTargetMode());
- disp.println(indent+"\tType: " + rel.getRelationshipType());
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length == 0) {
- System.err.println("Use:");
- System.err.println("\tjava HXFLister <filename>");
- System.exit(1);
- }
-
- File f = new File(args[0]);
- if(! f.exists()) {
- System.err.println("Error, file not found!");
- System.err.println("\t" + f.toString());
- System.exit(2);
- }
-
- HXFLister lister = new HXFLister(
- Package.open(f.toString(), PackageAccess.READ)
- );
-
- lister.disp.println(f.toString() + "\n");
- lister.displayParts();
- lister.disp.println();
- lister.displayRelations();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf;
-
-import java.io.File;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideMasterIdListEntry;
-
-import junit.framework.TestCase;
-
-public class TestHSLFXML extends TestCase {
- private File sampleFile;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- sampleFile = new File(
- System.getProperty("HSLF.testdata.path") +
- File.separator + "sample.pptx"
- );
- }
-
- public void testContainsMainContentType() throws Exception {
- Package pack = HXFDocument.openPackage(sampleFile);
-
- boolean found = false;
- for(PackagePart part : pack.getParts()) {
- if(part.getContentType().equals(HSLFXML.MAIN_CONTENT_TYPE)) {
- found = true;
- }
- System.out.println(part);
- }
- assertTrue(found);
- }
-
- public void testOpen() throws Exception {
- HXFDocument.openPackage(sampleFile);
-
- HSLFXML xml;
-
- // With the finalised uri, should be fine
- xml = new HSLFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- // Check the core
- assertNotNull(xml.getPresentation());
-
- // Check it has some slides
- assertTrue(
- xml.getSlideReferences().sizeOfSldIdArray() > 0
- );
- assertTrue(
- xml.getSlideMasterReferences().sizeOfSldMasterIdArray() > 0
- );
- }
-
- public void testSlideBasics() throws Exception {
- HSLFXML xml = new HSLFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- // Should have 1 master
- assertEquals(1, xml.getSlideMasterReferences().sizeOfSldMasterIdArray());
- assertEquals(1, xml.getSlideMasterReferences().getSldMasterIdArray().length);
-
- // Should have three sheets
- assertEquals(2, xml.getSlideReferences().sizeOfSldIdArray());
- assertEquals(2, xml.getSlideReferences().getSldIdArray().length);
-
- // Check they're as expected
- CTSlideIdListEntry[] slides = xml.getSlideReferences().getSldIdArray();
- assertEquals(256, slides[0].getId());
- assertEquals(257, slides[1].getId());
- assertEquals("rId2", slides[0].getId2());
- assertEquals("rId3", slides[1].getId2());
-
- // Now get those objects
- assertNotNull(xml.getSlide(slides[0]));
- assertNotNull(xml.getSlide(slides[1]));
-
- // And check they have notes as expected
- assertNotNull(xml.getNotes(slides[0]));
- assertNotNull(xml.getNotes(slides[1]));
-
- // And again for the master
- CTSlideMasterIdListEntry[] masters =
- xml.getSlideMasterReferences().getSldMasterIdArray();
- assertEquals(2147483648l, masters[0].getId());
- assertEquals("rId1", masters[0].getId2());
- assertNotNull(xml.getSlideMaster(masters[0]));
- }
-
- public void testMetadataBasics() throws Exception {
- HSLFXML xml = new HSLFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- assertNotNull(xml.getCoreProperties());
- assertNotNull(xml.getExtendedProperties());
-
- assertEquals("Microsoft Office PowerPoint", xml.getExtendedProperties().getApplication());
- assertEquals(0, xml.getExtendedProperties().getCharacters());
- assertEquals(0, xml.getExtendedProperties().getLines());
-
- assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
- assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf.extractor;
-
-import java.io.File;
-
-import org.apache.poi.hslf.HSLFXML;
-import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow;
-import org.apache.poi.hxf.HXFDocument;
-
-import junit.framework.TestCase;
-
-/**
- * Tests for HXFPowerPointExtractor
- */
-public class TestHXFPowerPointExtractor extends TestCase {
- /**
- * A simple file
- */
- private HSLFXML xmlA;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- File fileA = new File(
- System.getProperty("HSLF.testdata.path") +
- File.separator + "sample.pptx"
- );
-
- xmlA = new HSLFXML(HXFDocument.openPackage(fileA));
- }
-
- /**
- * Get text out of the simple file
- */
- public void testGetSimpleText() throws Exception {
- new HXFPowerPointExtractor(xmlA.getPackage());
- new HXFPowerPointExtractor(new HSLFXMLSlideShow(xmlA));
-
- HXFPowerPointExtractor extractor =
- new HXFPowerPointExtractor(xmlA.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check Basics
- assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
- assertTrue(text.endsWith("amet\n\n"));
-
- // Just slides, no notes
- text = extractor.getText(true, false);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
- "\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n", text
- );
-
- // Just notes, no slides
- text = extractor.getText(false, true);
- assertEquals(
- "\n\n\n\n", text
- );
-
- // Both
- text = extractor.getText(true, true);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
- "\n\n\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n\n\n", text
- );
-
- // Via set defaults
- extractor.setSlidesByDefault(false);
- extractor.setNotesByDefault(true);
- text = extractor.getText();
- assertEquals(
- "\n\n\n\n", text
- );
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf;
-
-import java.io.File;
-
-import org.apache.poi.hssf.model.SharedStringsTable;
-import org.apache.poi.hxf.HXFDocument;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-
-import junit.framework.TestCase;
-
-public class TestHSSFXML extends TestCase {
- /**
- * Uses the old style schemas.microsoft.com schema uri
- */
- private File sampleFileBeta;
- /**
- * Uses the new style schemas.openxmlformats.org schema uri
- */
- private File sampleFile;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- sampleFile = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "sample.xlsx"
- );
- sampleFileBeta = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "sample-beta.xlsx"
- );
- }
-
- public void testContainsMainContentType() throws Exception {
- Package pack = HXFDocument.openPackage(sampleFile);
-
- boolean found = false;
- for(PackagePart part : pack.getParts()) {
- if(part.getContentType().equals(HSSFXML.MAIN_CONTENT_TYPE)) {
- found = true;
- }
- System.out.println(part);
- }
- assertTrue(found);
- }
-
- public void testOpen() throws Exception {
- HXFDocument.openPackage(sampleFile);
- HXFDocument.openPackage(sampleFileBeta);
-
- HSSFXML xml;
-
- // With an old-style uri, as found in a file produced
- // with the office 2007 beta, will fail, as we don't
- // translate things
- try {
- xml = new HSSFXML(
- HXFDocument.openPackage(sampleFileBeta)
- );
- fail();
- } catch(Exception e) {}
-
- // With the finalised uri, should be fine
- xml = new HSSFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- // Check it has a workbook
- assertNotNull(xml.getWorkbook());
- }
-
- public void testSheetBasics() throws Exception {
- HSSFXML xml = new HSSFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- // Should have three sheets
- assertEquals(3, xml.getSheetReferences().sizeOfSheetArray());
- assertEquals(3, xml.getSheetReferences().getSheetArray().length);
-
- // Check they're as expected
- CTSheet[] sheets = xml.getSheetReferences().getSheetArray();
- assertEquals("Sheet1", sheets[0].getName());
- assertEquals("Sheet2", sheets[1].getName());
- assertEquals("Sheet3", sheets[2].getName());
- assertEquals("rId1", sheets[0].getId());
- assertEquals("rId2", sheets[1].getId());
- assertEquals("rId3", sheets[2].getId());
-
- // Now get those objects
- assertNotNull(xml.getSheet(sheets[0]));
- assertNotNull(xml.getSheet(sheets[1]));
- assertNotNull(xml.getSheet(sheets[2]));
- }
-
- public void testMetadataBasics() throws Exception {
- HSSFXML xml = new HSSFXML(
- HXFDocument.openPackage(sampleFile)
- );
- assertNotNull(xml.getCoreProperties());
- assertNotNull(xml.getExtendedProperties());
-
- assertEquals("Microsoft Excel", xml.getExtendedProperties().getApplication());
- assertEquals(0, xml.getExtendedProperties().getCharacters());
- assertEquals(0, xml.getExtendedProperties().getLines());
-
- assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
- assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
- }
-
- public void testSharedStringBasics() throws Exception {
- HSSFXML xml = new HSSFXML(
- HXFDocument.openPackage(sampleFile)
- );
- assertNotNull(xml._getSharedStringsTable());
-
- SharedStringsTable sst = xml._getSharedStringsTable();
- assertEquals(10, sst.size());
-
- assertEquals("Lorem", sst.get(0));
- for(int i=0; i<sst.size(); i++) {
- assertEquals(sst.get(i), xml.getSharedString(i));
- }
-
- // Add a few more, then save and reload, checking
- // changes have been kept
- sst.add("Foo");
- sst.add("Bar");
- sst.set(0, "LoremLorem");
-
- sst.write();
-
- xml = new HSSFXML(xml.getPackage());
- sst = xml._getSharedStringsTable();
- assertEquals(12, sst.size());
-
- assertEquals("LoremLorem", sst.get(0));
- for(int i=0; i<sst.size(); i++) {
- assertEquals(sst.get(i), xml.getSharedString(i));
- }
- }
-}
\ No newline at end of file
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.extractor;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import junit.framework.TestCase;
-
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.hssf.HSSFXML;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
-import org.apache.poi.hxf.HXFDocument;
-
-/**
- * Tests for HXFExcelExtractor
- */
-public class TestHXFExcelExtractor extends TestCase {
- /**
- * A very simple file
- */
- private HSSFXML xmlA;
- /**
- * A fairly complex file
- */
- private HSSFXML xmlB;
-
- /**
- * A fairly simple file - ooxml
- */
- private HSSFXML simpleXLSX;
- /**
- * A fairly simple file - ole2
- */
- private HSSFWorkbook simpleXLS;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- File fileA = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "sample.xlsx"
- );
- File fileB = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "AverageTaxRates.xlsx"
- );
-
- File fileSOOXML = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "SampleSS.xlsx"
- );
- File fileSOLE2 = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "SampleSS.xls"
- );
-
- xmlA = new HSSFXML(HXFDocument.openPackage(fileA));
- xmlB = new HSSFXML(HXFDocument.openPackage(fileB));
-
- simpleXLSX = new HSSFXML(HXFDocument.openPackage(fileSOOXML));
- simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
- }
-
- /**
- * Get text out of the simple file
- */
- public void testGetSimpleText() throws Exception {
- new HXFExcelExtractor(xmlA.getPackage());
- new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA));
-
- HXFExcelExtractor extractor =
- new HXFExcelExtractor(xmlA.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check sheet names
- assertTrue(text.startsWith("Sheet1"));
- assertTrue(text.endsWith("Sheet3\n"));
-
- // Now without, will have text
- extractor.setIncludeSheetNames(false);
- text = extractor.getText();
- assertEquals(
- "Lorem\t111\n" +
- "ipsum\t222\n" +
- "dolor\t333\n" +
- "sit\t444\n" +
- "amet\t555\n" +
- "consectetuer\t666\n" +
- "adipiscing\t777\n" +
- "elit\t888\n" +
- "Nunc\t999\n" +
- "at\t4995\n" +
- "\n\n", text);
-
- // Now get formulas not their values
- extractor.setFormulasNotResults(true);
- text = extractor.getText();
- assertEquals(
- "Lorem\t111\n" +
- "ipsum\t222\n" +
- "dolor\t333\n" +
- "sit\t444\n" +
- "amet\t555\n" +
- "consectetuer\t666\n" +
- "adipiscing\t777\n" +
- "elit\t888\n" +
- "Nunc\t999\n" +
- "at\tSUM(B1:B9)\n" +
- "\n\n", text);
-
- // With sheet names too
- extractor.setIncludeSheetNames(true);
- text = extractor.getText();
- assertEquals(
- "Sheet1\n" +
- "Lorem\t111\n" +
- "ipsum\t222\n" +
- "dolor\t333\n" +
- "sit\t444\n" +
- "amet\t555\n" +
- "consectetuer\t666\n" +
- "adipiscing\t777\n" +
- "elit\t888\n" +
- "Nunc\t999\n" +
- "at\tSUM(B1:B9)\n\n" +
- "Sheet2\n\n" +
- "Sheet3\n"
- , text);
- }
-
- public void testGetComplexText() throws Exception {
- new HXFExcelExtractor(xmlB.getPackage());
- new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
-
- HXFExcelExtractor extractor =
- new HXFExcelExtractor(xmlB.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Might not have all formatting it should do!
- // TODO decide if we should really have the "null" in there
- assertTrue(text.startsWith(
- "Avgtxfull\n" +
- "null\t(iii) AVERAGE TAX RATES ON ANNUAL"
- ));
- }
-
- /**
- * Test that we return pretty much the same as
- * ExcelExtractor does, when we're both passed
- * the same file, just saved as xls and xlsx
- */
- public void testComparedToOLE2() throws Exception {
- HXFExcelExtractor ooxmlExtractor =
- new HXFExcelExtractor(simpleXLSX.getPackage());
- ExcelExtractor ole2Extractor =
- new ExcelExtractor(simpleXLS);
-
- POITextExtractor[] extractors =
- new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
- for (int i = 0; i < extractors.length; i++) {
- POITextExtractor extractor = extractors[i];
-
- String text = extractor.getText().replaceAll("[\r\t]", "");
- //System.out.println(text.length());
- //System.out.println(text);
- assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
- Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
- Matcher m = pattern.matcher(text);
- assertTrue(m.matches());
- }
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf;
-
-import java.io.File;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-
-import junit.framework.TestCase;
-
-public class TestHWPFXML extends TestCase {
- private File sampleFile;
- private File complexFile;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- sampleFile = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "sample.docx"
- );
- complexFile = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "IllustrativeCases.docx"
- );
- }
-
- public void testContainsMainContentType() throws Exception {
- Package pack = HXFDocument.openPackage(sampleFile);
-
- boolean found = false;
- for(PackagePart part : pack.getParts()) {
- if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) {
- found = true;
- }
- System.out.println(part);
- }
- assertTrue(found);
- }
-
- public void testOpen() throws Exception {
- HXFDocument.openPackage(sampleFile);
- HXFDocument.openPackage(complexFile);
-
- HWPFXML xml;
-
- // Simple file
- xml = new HWPFXML(
- HXFDocument.openPackage(sampleFile)
- );
- // Check it has key parts
- assertNotNull(xml.getDocument());
- assertNotNull(xml.getDocumentBody());
- assertNotNull(xml.getStyle());
-
- // Complex file
- xml = new HWPFXML(
- HXFDocument.openPackage(complexFile)
- );
- assertNotNull(xml.getDocument());
- assertNotNull(xml.getDocumentBody());
- assertNotNull(xml.getStyle());
- }
-
- public void testMetadataBasics() throws Exception {
- HWPFXML xml = new HWPFXML(
- HXFDocument.openPackage(sampleFile)
- );
- assertNotNull(xml.getCoreProperties());
- assertNotNull(xml.getExtendedProperties());
-
- assertEquals("Microsoft Office Word", xml.getExtendedProperties().getApplication());
- assertEquals(1315, xml.getExtendedProperties().getCharacters());
- assertEquals(10, xml.getExtendedProperties().getLines());
-
- assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
- assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
- }
-
- public void testMetadataComplex() throws Exception {
- HWPFXML xml = new HWPFXML(
- HXFDocument.openPackage(complexFile)
- );
- assertNotNull(xml.getCoreProperties());
- assertNotNull(xml.getExtendedProperties());
-
- assertEquals("Microsoft Office Outlook", xml.getExtendedProperties().getApplication());
- assertEquals(5184, xml.getExtendedProperties().getCharacters());
- assertEquals(0, xml.getExtendedProperties().getLines());
-
- assertEquals(" ", xml.getCoreProperties().getTitleProperty().getValue());
- assertEquals(" ", xml.getCoreProperties().getSubjectProperty().getValue());
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-
-import org.apache.poi.hwpf.HWPFXML;
-import org.apache.poi.hwpf.usermodel.HWPFXMLDocument;
-import org.apache.poi.hxf.HXFDocument;
-
-import junit.framework.TestCase;
-
-/**
- * Tests for HXFWordExtractor
- */
-public class TestHXFWordExtractor extends TestCase {
- /**
- * A very simple file
- */
- private HWPFXML xmlA;
- /**
- * A fairly complex file
- */
- private HWPFXML xmlB;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- File fileA = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "sample.docx"
- );
- File fileB = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "IllustrativeCases.docx"
- );
-
- xmlA = new HWPFXML(HXFDocument.openPackage(fileA));
- xmlB = new HWPFXML(HXFDocument.openPackage(fileB));
- }
-
- /**
- * Get text out of the simple file
- */
- public void testGetSimpleText() throws Exception {
- new HXFWordExtractor(xmlA.getPackage());
- new HXFWordExtractor(new HWPFXMLDocument(xmlA));
-
- HXFWordExtractor extractor =
- new HXFWordExtractor(xmlA.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check contents
- assertTrue(text.startsWith(
- "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
- ));
- assertTrue(text.endsWith(
- "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
- ));
-
- // Check number of paragraphs
- int ps = 0;
- char[] t = text.toCharArray();
- for (int i = 0; i < t.length; i++) {
- if(t[i] == '\n') { ps++; }
- }
- assertEquals(3, ps);
- }
-
- /**
- * Tests getting the text out of a complex file
- */
- public void testGetComplexText() throws Exception {
- HXFWordExtractor extractor =
- new HXFWordExtractor(xmlB.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- char euro = '\u20ac';
- System.err.println("'"+text.substring(text.length() - 20) + "'");
-
- // Check contents
- assertTrue(text.startsWith(
- " \n(V) ILLUSTRATIVE CASES\n\n"
- ));
- assertTrue(text.endsWith(
- "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
- ));
-
- // Check number of paragraphs
- int ps = 0;
- char[] t = text.toCharArray();
- for (int i = 0; i < t.length; i++) {
- if(t[i] == '\n') { ps++; }
- }
- assertEquals(79, ps);
- }
-}
+++ /dev/null
-
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-
-package org.apache.poi.hxf;
-
-import junit.framework.TestCase;
-import java.io.*;
-
-/**
- * Class to test that HXF correctly detects OOXML
- * documents
- */
-public class TestDetectAsOOXML extends TestCase
-{
- public String dirname;
-
- public void setUp() {
- dirname = System.getProperty("HSSF.testdata.path");
- }
-
- public void testOpensProperly() throws Exception
- {
- File f = new File(dirname + "/sample.xlsx");
-
- HXFDocument.openPackage(f);
- }
-
- public void testDetectAsPOIFS() throws Exception {
- InputStream in;
-
- // ooxml file is
- in = new PushbackInputStream(
- new FileInputStream(dirname + "/SampleSS.xlsx"), 10
- );
- assertTrue(HXFDocument.hasOOXMLHeader(in));
-
- // xls file isn't
- in = new PushbackInputStream(
- new FileInputStream(dirname + "/SampleSS.xls"), 10
- );
- assertFalse(HXFDocument.hasOOXMLHeader(in));
-
- // text file isn't
- in = new PushbackInputStream(
- new FileInputStream(dirname + "/SampleSS.txt"), 10
- );
- assertFalse(HXFDocument.hasOOXMLHeader(in));
- }
-}