PackageRelationship rel = rels.getRelationship(0);
return getTargetPart(rel);
}
+
+ /**
+ * Retrieves all the PackageParts which are defined as
+ * relationships of the base document with the
+ * specified content type.
+ */
+ protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
+ PackageRelationshipCollection partsC =
+ getCorePart().getRelationshipsByType(contentType);
+
+ PackagePart[] parts = new PackagePart[partsC.size()];
+ int count = 0;
+ for (PackageRelationship rel : partsC) {
+ parts[count] = getTargetPart(rel);
+ count++;
+ }
+ return parts;
+ }
+
/**
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf;
-
-import java.io.IOException;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.InvalidFormatException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
-
-/**
- * Experimental class to do low level processing
- * of docx files.
- *
- * If you are using these low level classes, then you
- * will almost certainly need to refer to the OOXML
- * specifications from
- * http://www.ecma-international.org/publications/standards/Ecma-376.htm
- *
- * WARNING - APIs expected to change rapidly
- */
-public class HWPFXML extends HXFDocument {
- public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
- public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
- public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
- public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
- public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
-
- private DocumentDocument wordDoc;
-
- public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException {
- super(container, MAIN_CONTENT_TYPE);
-
- wordDoc =
- DocumentDocument.Factory.parse(basePart.getInputStream());
- }
-
- /**
- * Returns the low level document base object
- */
- public CTDocument1 getDocument() {
- return wordDoc.getDocument();
- }
-
- /**
- * Returns the low level body of the document
- */
- public CTBody getDocumentBody() {
- return getDocument().getBody();
- }
-
- /**
- * Returns the styles object used
- */
- public CTStyles getStyle() throws XmlException, IOException {
- PackagePart[] parts;
- try {
- parts = getRelatedByType(STYLES_RELATION_TYPE);
- } catch(InvalidFormatException e) {
- throw new IllegalStateException(e);
- }
- if(parts.length != 1) {
- throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length);
- }
-
- StylesDocument sd =
- StylesDocument.Factory.parse(parts[0].getInputStream());
- return sd.getStyles();
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf;
+
+import java.io.IOException;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.InvalidFormatException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
+
+/**
+ * Experimental class to do low level processing
+ * of docx files.
+ *
+ * If you are using these low level classes, then you
+ * will almost certainly need to refer to the OOXML
+ * specifications from
+ * http://www.ecma-international.org/publications/standards/Ecma-376.htm
+ *
+ * WARNING - APIs expected to change rapidly
+ */
+public class XWPFDocument extends POIXMLDocument {
+ public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
+ public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
+ public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
+ public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
+ public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
+
+ private DocumentDocument wordDoc;
+
+ public XWPFDocument(Package container) throws OpenXML4JException, IOException, XmlException {
+ super(container);
+
+ wordDoc =
+ DocumentDocument.Factory.parse(getCorePart().getInputStream());
+ }
+
+ /**
+ * Returns the low level document base object
+ */
+ public CTDocument1 getDocument() {
+ return wordDoc.getDocument();
+ }
+
+ /**
+ * Returns the low level body of the document
+ */
+ public CTBody getDocumentBody() {
+ return getDocument().getBody();
+ }
+
+ /**
+ * Returns the styles object used
+ */
+ public CTStyles getStyle() throws XmlException, IOException {
+ PackagePart[] parts;
+ try {
+ parts = getRelatedByType(STYLES_RELATION_TYPE);
+ } catch(InvalidFormatException e) {
+ throw new IllegalStateException(e);
+ }
+ if(parts.length != 1) {
+ throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length);
+ }
+
+ StylesDocument sd =
+ StylesDocument.Factory.parse(parts[0].getInputStream());
+ return sd.getStyles();
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hwpf.HWPFXML;
-import org.apache.poi.hwpf.usermodel.HWPFXMLDocument;
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
-
-/**
- * Helper class to extract text from an OOXML Word file
- */
-public class HXFWordExtractor extends POIXMLTextExtractor {
- private HWPFXMLDocument document;
-
- public HXFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
- this(new HWPFXMLDocument(
- new HWPFXML(container)
- ));
- }
- public HXFWordExtractor(HWPFXMLDocument document) {
- super(document);
- this.document = document;
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" HXFWordExtractor <filename.xlsx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new HXFWordExtractor(HXFDocument.openPackage(
- new File(args[0])
- ));
- System.out.println(extractor.getText());
- }
-
- public String getText() {
- CTBody body = document._getHWPFXML().getDocumentBody();
- StringBuffer text = new StringBuffer();
-
- // Loop over paragraphs
- CTP[] ps = body.getPArray();
- for (int i = 0; i < ps.length; i++) {
- // Loop over ranges
- CTR[] rs = ps[i].getRArray();
- for (int j = 0; j < rs.length; j++) {
- // Loop over text runs
- CTText[] texts = rs[j].getTArray();
- for (int k = 0; k < texts.length; k++) {
- text.append(
- texts[k].getStringValue()
- );
- }
- }
- // New line after each paragraph.
- text.append("\n");
- }
-
- return text.toString();
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.xwpf.XWPFDocument;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
+
+/**
+ * Helper class to extract text from an OOXML Word file
+ */
+public class XWPFWordExtractor extends POIXMLTextExtractor {
+ private XWPFDocument document;
+
+ public XWPFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
+ this(new XWPFDocument(container));
+ }
+ public XWPFWordExtractor(XWPFDocument document) {
+ super(document);
+ this.document = document;
+ }
+
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" HXFWordExtractor <filename.xlsx>");
+ System.exit(1);
+ }
+ POIXMLTextExtractor extractor =
+ new XWPFWordExtractor(POIXMLDocument.openPackage(
+ args[0]
+ ));
+ System.out.println(extractor.getText());
+ }
+
+ public String getText() {
+ CTBody body = document.getDocumentBody();
+ StringBuffer text = new StringBuffer();
+
+ // Loop over paragraphs
+ CTP[] ps = body.getPArray();
+ for (int i = 0; i < ps.length; i++) {
+ // Loop over ranges
+ CTR[] rs = ps[i].getRArray();
+ for (int j = 0; j < rs.length; j++) {
+ // Loop over text runs
+ CTText[] texts = rs[j].getTArray();
+ for (int k = 0; k < texts.length; k++) {
+ text.append(
+ texts[k].getStringValue()
+ );
+ }
+ }
+ // New line after each paragraph.
+ text.append("\n");
+ }
+
+ return text.toString();
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.usermodel;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.hwpf.HWPFXML;
-
-/**
- * High level representation of a ooxml text document.
- */
-public class HWPFXMLDocument extends POIXMLDocument {
- private HWPFXML hwpfXML;
-
- public HWPFXMLDocument(HWPFXML xml) {
- super(xml);
- this.hwpfXML = xml;
- }
-
- public HWPFXML _getHWPFXML() {
- return hwpfXML;
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import org.apache.poi.xwpf.XWPFDocument;
+
+/**
+ * High level representation of a ooxml text document.
+ */
+public class XMLWordDocument {
+ private XWPFDocument xwpfXML;
+
+ public XMLWordDocument(XWPFDocument xml) {
+ this.xwpfXML = xml;
+ }
+
+ public XWPFDocument _getXWPFXML() {
+ return xwpfXML;
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hslf.extractor;
-
-import java.io.File;
-
-import org.apache.poi.hslf.HSLFXML;
-import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow;
-import org.apache.poi.hxf.HXFDocument;
-
-import junit.framework.TestCase;
-
-/**
- * Tests for HXFPowerPointExtractor
- */
-public class TestHXFPowerPointExtractor extends TestCase {
- /**
- * A simple file
- */
- private HSLFXML xmlA;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- File fileA = new File(
- System.getProperty("HSLF.testdata.path") +
- File.separator + "sample.pptx"
- );
-
- xmlA = new HSLFXML(HXFDocument.openPackage(fileA));
- }
-
- /**
- * Get text out of the simple file
- */
- public void testGetSimpleText() throws Exception {
- new HXFPowerPointExtractor(xmlA.getPackage());
- new HXFPowerPointExtractor(new XMLSlideShow(xmlA));
-
- HXFPowerPointExtractor extractor =
- new HXFPowerPointExtractor(xmlA.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check Basics
- assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
- assertTrue(text.endsWith("amet\n\n"));
-
- // Just slides, no notes
- text = extractor.getText(true, false);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
- "\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n", text
- );
-
- // Just notes, no slides
- text = extractor.getText(false, true);
- assertEquals(
- "\n\n\n\n", text
- );
-
- // Both
- text = extractor.getText(true, true);
- assertEquals(
- "Lorem ipsum dolor sit amet\n" +
- "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
- "\n\n\n" +
- "Lorem ipsum dolor sit amet\n" +
- "Lorem\n" +
- "ipsum\n" +
- "dolor\n" +
- "sit\n" +
- "amet\n" +
- "\n\n\n", text
- );
-
- // Via set defaults
- extractor.setSlidesByDefault(false);
- extractor.setNotesByDefault(true);
- text = extractor.getText();
- assertEquals(
- "\n\n\n\n", text
- );
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xslf.extractor;
+
+import java.io.File;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.xslf.XSLFSlideShow;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for HXFPowerPointExtractor
+ */
+public class TestXSLFPowerPointExtractor extends TestCase {
+ /**
+ * A simple file
+ */
+ private XSLFSlideShow xmlA;
+ private File fileA;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ fileA = new File(
+ System.getProperty("HSLF.testdata.path") +
+ File.separator + "sample.pptx"
+ );
+ assertTrue(fileA.exists());
+
+ xmlA = new XSLFSlideShow(fileA.toString());
+ }
+
+ /**
+ * Get text out of the simple file
+ */
+ public void testGetSimpleText() throws Exception {
+ new XSLFPowerPointExtractor(xmlA);
+ new XSLFPowerPointExtractor(
+ POIXMLDocument.openPackage(fileA.toString()));
+
+ XSLFPowerPointExtractor extractor =
+ new XSLFPowerPointExtractor(xmlA);
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check Basics
+ assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
+ assertTrue(text.endsWith("amet\n\n"));
+
+ // Just slides, no notes
+ text = extractor.getText(true, false);
+ assertEquals(
+ "Lorem ipsum dolor sit amet\n" +
+ "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
+ "\n" +
+ "Lorem ipsum dolor sit amet\n" +
+ "Lorem\n" +
+ "ipsum\n" +
+ "dolor\n" +
+ "sit\n" +
+ "amet\n" +
+ "\n", text
+ );
+
+ // Just notes, no slides
+ text = extractor.getText(false, true);
+ assertEquals(
+ "\n\n\n\n", text
+ );
+
+ // Both
+ text = extractor.getText(true, true);
+ assertEquals(
+ "Lorem ipsum dolor sit amet\n" +
+ "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
+ "\n\n\n" +
+ "Lorem ipsum dolor sit amet\n" +
+ "Lorem\n" +
+ "ipsum\n" +
+ "dolor\n" +
+ "sit\n" +
+ "amet\n" +
+ "\n\n\n", text
+ );
+
+ // Via set defaults
+ extractor.setSlidesByDefault(false);
+ extractor.setNotesByDefault(true);
+ text = extractor.getText();
+ assertEquals(
+ "\n\n\n\n", text
+ );
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf;
-
-import java.io.File;
-
-import org.apache.poi.hxf.HXFDocument;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-
-import junit.framework.TestCase;
-
-public class TestHWPFXML extends TestCase {
- private File sampleFile;
- private File complexFile;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- sampleFile = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "sample.docx"
- );
- complexFile = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "IllustrativeCases.docx"
- );
- }
-
- public void testContainsMainContentType() throws Exception {
- Package pack = HXFDocument.openPackage(sampleFile);
-
- boolean found = false;
- for(PackagePart part : pack.getParts()) {
- if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) {
- found = true;
- }
- System.out.println(part);
- }
- assertTrue(found);
- }
-
- public void testOpen() throws Exception {
- HXFDocument.openPackage(sampleFile);
- HXFDocument.openPackage(complexFile);
-
- HWPFXML xml;
-
- // Simple file
- xml = new HWPFXML(
- HXFDocument.openPackage(sampleFile)
- );
- // Check it has key parts
- assertNotNull(xml.getDocument());
- assertNotNull(xml.getDocumentBody());
- assertNotNull(xml.getStyle());
-
- // Complex file
- xml = new HWPFXML(
- HXFDocument.openPackage(complexFile)
- );
- assertNotNull(xml.getDocument());
- assertNotNull(xml.getDocumentBody());
- assertNotNull(xml.getStyle());
- }
-
- public void testMetadataBasics() throws Exception {
- HWPFXML xml = new HWPFXML(
- HXFDocument.openPackage(sampleFile)
- );
- assertNotNull(xml.getCoreProperties());
- assertNotNull(xml.getExtendedProperties());
-
- assertEquals("Microsoft Office Word", xml.getExtendedProperties().getApplication());
- assertEquals(1315, xml.getExtendedProperties().getCharacters());
- assertEquals(10, xml.getExtendedProperties().getLines());
-
- assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
- assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
- }
-
- public void testMetadataComplex() throws Exception {
- HWPFXML xml = new HWPFXML(
- HXFDocument.openPackage(complexFile)
- );
- assertNotNull(xml.getCoreProperties());
- assertNotNull(xml.getExtendedProperties());
-
- assertEquals("Microsoft Office Outlook", xml.getExtendedProperties().getApplication());
- assertEquals(5184, xml.getExtendedProperties().getCharacters());
- assertEquals(0, xml.getExtendedProperties().getLines());
-
- assertEquals(" ", xml.getCoreProperties().getTitleProperty().getValue());
- assertEquals(" ", xml.getCoreProperties().getSubjectProperty().getValue());
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf;
+
+import java.io.File;
+
+import org.apache.poi.POIXMLDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+
+import junit.framework.TestCase;
+
+public class TestXWPFDocument extends TestCase {
+ private File sampleFile;
+ private File complexFile;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ sampleFile = new File(
+ System.getProperty("HWPF.testdata.path") +
+ File.separator + "sample.docx"
+ );
+ complexFile = new File(
+ System.getProperty("HWPF.testdata.path") +
+ File.separator + "IllustrativeCases.docx"
+ );
+
+ assertTrue(sampleFile.exists());
+ assertTrue(complexFile.exists());
+ }
+
+ public void testContainsMainContentType() throws Exception {
+ Package pack = POIXMLDocument.openPackage(sampleFile.toString());
+
+ boolean found = false;
+ for(PackagePart part : pack.getParts()) {
+ if(part.getContentType().equals(XWPFDocument.MAIN_CONTENT_TYPE)) {
+ found = true;
+ }
+ System.out.println(part);
+ }
+ assertTrue(found);
+ }
+
+ public void testOpen() throws Exception {
+ POIXMLDocument.openPackage(sampleFile.toString());
+ POIXMLDocument.openPackage(complexFile.toString());
+
+ new XWPFDocument(
+ POIXMLDocument.openPackage(sampleFile.toString())
+ );
+ new XWPFDocument(
+ POIXMLDocument.openPackage(complexFile.toString())
+ );
+
+ XWPFDocument xml;
+
+ // Simple file
+ xml = new XWPFDocument(
+ POIXMLDocument.openPackage(sampleFile.toString())
+ );
+ // Check it has key parts
+ assertNotNull(xml.getDocument());
+ assertNotNull(xml.getDocumentBody());
+ assertNotNull(xml.getStyle());
+
+ // Complex file
+ xml = new XWPFDocument(
+ POIXMLDocument.openPackage(complexFile.toString())
+ );
+ assertNotNull(xml.getDocument());
+ assertNotNull(xml.getDocumentBody());
+ assertNotNull(xml.getStyle());
+ }
+
+ public void testMetadataBasics() throws Exception {
+ XWPFDocument xml = new XWPFDocument(
+ POIXMLDocument.openPackage(sampleFile.toString())
+ );
+ assertNotNull(xml.getCoreProperties());
+ assertNotNull(xml.getExtendedProperties());
+
+ assertEquals("Microsoft Office Word", xml.getExtendedProperties().getApplication());
+ assertEquals(1315, xml.getExtendedProperties().getCharacters());
+ assertEquals(10, xml.getExtendedProperties().getLines());
+
+ assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
+ assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
+ }
+
+ public void testMetadataComplex() throws Exception {
+ XWPFDocument xml = new XWPFDocument(
+ POIXMLDocument.openPackage(complexFile.toString())
+ );
+ assertNotNull(xml.getCoreProperties());
+ assertNotNull(xml.getExtendedProperties());
+
+ assertEquals("Microsoft Office Outlook", xml.getExtendedProperties().getApplication());
+ assertEquals(5184, xml.getExtendedProperties().getCharacters());
+ assertEquals(0, xml.getExtendedProperties().getLines());
+
+ assertEquals(" ", xml.getCoreProperties().getTitleProperty().getValue());
+ assertEquals(" ", xml.getCoreProperties().getSubjectProperty().getValue());
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hwpf.extractor;
-
-import java.io.File;
-
-import org.apache.poi.hwpf.HWPFXML;
-import org.apache.poi.hwpf.usermodel.HWPFXMLDocument;
-import org.apache.poi.hxf.HXFDocument;
-
-import junit.framework.TestCase;
-
-/**
- * Tests for HXFWordExtractor
- */
-public class TestHXFWordExtractor extends TestCase {
- /**
- * A very simple file
- */
- private HWPFXML xmlA;
- /**
- * A fairly complex file
- */
- private HWPFXML xmlB;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- File fileA = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "sample.docx"
- );
- File fileB = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "IllustrativeCases.docx"
- );
-
- xmlA = new HWPFXML(HXFDocument.openPackage(fileA));
- xmlB = new HWPFXML(HXFDocument.openPackage(fileB));
- }
-
- /**
- * Get text out of the simple file
- */
- public void testGetSimpleText() throws Exception {
- new HXFWordExtractor(xmlA.getPackage());
- new HXFWordExtractor(new HWPFXMLDocument(xmlA));
-
- HXFWordExtractor extractor =
- new HXFWordExtractor(xmlA.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check contents
- assertTrue(text.startsWith(
- "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
- ));
- assertTrue(text.endsWith(
- "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
- ));
-
- // Check number of paragraphs
- int ps = 0;
- char[] t = text.toCharArray();
- for (int i = 0; i < t.length; i++) {
- if(t[i] == '\n') { ps++; }
- }
- assertEquals(3, ps);
- }
-
- /**
- * Tests getting the text out of a complex file
- */
- public void testGetComplexText() throws Exception {
- HXFWordExtractor extractor =
- new HXFWordExtractor(xmlB.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- char euro = '\u20ac';
- System.err.println("'"+text.substring(text.length() - 20) + "'");
-
- // Check contents
- assertTrue(text.startsWith(
- " \n(V) ILLUSTRATIVE CASES\n\n"
- ));
- assertTrue(text.endsWith(
- "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
- ));
-
- // Check number of paragraphs
- int ps = 0;
- char[] t = text.toCharArray();
- for (int i = 0; i < t.length; i++) {
- if(t[i] == '\n') { ps++; }
- }
- assertEquals(79, ps);
- }
-}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.extractor;
+
+import java.io.File;
+
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.xwpf.XWPFDocument;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for HXFWordExtractor
+ */
+public class TestXWPFWordExtractor extends TestCase {
+ /**
+ * A very simple file
+ */
+ private XWPFDocument xmlA;
+ private File fileA;
+ /**
+ * A fairly complex file
+ */
+ private XWPFDocument xmlB;
+ private File fileB;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ fileA = new File(
+ System.getProperty("HWPF.testdata.path") +
+ File.separator + "sample.docx"
+ );
+ fileB = new File(
+ System.getProperty("HWPF.testdata.path") +
+ File.separator + "IllustrativeCases.docx"
+ );
+ assertTrue(fileA.exists());
+ assertTrue(fileB.exists());
+
+ xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
+ xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
+ }
+
+ /**
+ * Get text out of the simple file
+ */
+ public void testGetSimpleText() throws Exception {
+ new XWPFWordExtractor(xmlA);
+ new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString()));
+
+ XWPFWordExtractor extractor =
+ new XWPFWordExtractor(xmlA);
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check contents
+ assertTrue(text.startsWith(
+ "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
+ ));
+ assertTrue(text.endsWith(
+ "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
+ ));
+
+ // Check number of paragraphs
+ int ps = 0;
+ char[] t = text.toCharArray();
+ for (int i = 0; i < t.length; i++) {
+ if(t[i] == '\n') { ps++; }
+ }
+ assertEquals(3, ps);
+ }
+
+ /**
+ * Tests getting the text out of a complex file
+ */
+ public void testGetComplexText() throws Exception {
+ XWPFWordExtractor extractor =
+ new XWPFWordExtractor(xmlB);
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ char euro = '\u20ac';
+ System.err.println("'"+text.substring(text.length() - 20) + "'");
+
+ // Check contents
+ assertTrue(text.startsWith(
+ " \n(V) ILLUSTRATIVE CASES\n\n"
+ ));
+ assertTrue(text.endsWith(
+ "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
+ ));
+
+ // Check number of paragraphs
+ int ps = 0;
+ char[] t = text.toCharArray();
+ for (int i = 0; i < t.length; i++) {
+ if(t[i] == '\n') { ps++; }
+ }
+ assertEquals(79, ps);
+ }
+}