From: Nick Burch Date: Sun, 9 Mar 2008 14:39:36 +0000 (+0000) Subject: Update the word code to the new style ooxml stuff X-Git-Tag: REL_3_5_BETA2~195 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=0a06d325fa4bf4614adba9abe72ca6b2d44c3e5f;p=poi.git Update the word code to the new style ooxml stuff git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635253 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java index 4e190d7e77..606c0c3b80 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java @@ -123,6 +123,25 @@ public abstract class POIXMLDocument { PackageRelationship rel = rels.getRelationship(0); return getTargetPart(rel); } + + /** + * Retrieves all the PackageParts which are defined as + * relationships of the base document with the + * specified content type. + */ + protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException { + PackageRelationshipCollection partsC = + getCorePart().getRelationshipsByType(contentType); + + PackagePart[] parts = new PackagePart[partsC.size()]; + int count = 0; + for (PackageRelationship rel : partsC) { + parts[count] = getTargetPart(rel); + count++; + } + return parts; + } + /** diff --git a/src/ooxml/java/org/apache/poi/xwpf/HWPFXML.java b/src/ooxml/java/org/apache/poi/xwpf/HWPFXML.java deleted file mode 100644 index 66bba7ee1b..0000000000 --- a/src/ooxml/java/org/apache/poi/xwpf/HWPFXML.java +++ /dev/null @@ -1,92 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf; - -import java.io.IOException; - -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.InvalidFormatException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument; - -/** - * Experimental class to do low level processing - * of docx files. - * - * If you are using these low level classes, then you - * will almost certainly need to refer to the OOXML - * specifications from - * http://www.ecma-international.org/publications/standards/Ecma-376.htm - * - * WARNING - APIs expected to change rapidly - */ -public class HWPFXML extends HXFDocument { - public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"; - public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"; - public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"; - public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"; - public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"; - - private DocumentDocument wordDoc; - - public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException { - super(container, MAIN_CONTENT_TYPE); - - wordDoc = - DocumentDocument.Factory.parse(basePart.getInputStream()); - } - - /** - * Returns the low level document base object - */ - public CTDocument1 getDocument() { - return wordDoc.getDocument(); - } - - /** - * Returns the low level body of the document - */ - public CTBody getDocumentBody() { - return getDocument().getBody(); - } - - /** - * Returns the styles object used - */ - public CTStyles getStyle() throws XmlException, IOException { - PackagePart[] parts; - try { - parts = getRelatedByType(STYLES_RELATION_TYPE); - } catch(InvalidFormatException e) { - throw new IllegalStateException(e); - } - if(parts.length != 1) { - throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length); - } - - StylesDocument sd = - StylesDocument.Factory.parse(parts[0].getInputStream()); - return sd.getStyles(); - } -} diff --git a/src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java b/src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java new file mode 100644 index 0000000000..4b54ed8637 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/XWPFDocument.java @@ -0,0 +1,92 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf; + +import java.io.IOException; + +import org.apache.poi.POIXMLDocument; +import org.apache.xmlbeans.XmlException; +import org.openxml4j.exceptions.InvalidFormatException; +import org.openxml4j.exceptions.OpenXML4JException; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackagePart; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument; + +/** + * Experimental class to do low level processing + * of docx files. + * + * If you are using these low level classes, then you + * will almost certainly need to refer to the OOXML + * specifications from + * http://www.ecma-international.org/publications/standards/Ecma-376.htm + * + * WARNING - APIs expected to change rapidly + */ +public class XWPFDocument extends POIXMLDocument { + public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"; + public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"; + public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"; + public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"; + public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"; + + private DocumentDocument wordDoc; + + public XWPFDocument(Package container) throws OpenXML4JException, IOException, XmlException { + super(container); + + wordDoc = + DocumentDocument.Factory.parse(getCorePart().getInputStream()); + } + + /** + * Returns the low level document base object + */ + public CTDocument1 getDocument() { + return wordDoc.getDocument(); + } + + /** + * Returns the low level body of the document + */ + public CTBody getDocumentBody() { + return getDocument().getBody(); + } + + /** + * Returns the styles object used + */ + public CTStyles getStyle() throws XmlException, IOException { + PackagePart[] parts; + try { + parts = getRelatedByType(STYLES_RELATION_TYPE); + } catch(InvalidFormatException e) { + throw new IllegalStateException(e); + } + if(parts.length != 1) { + throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length); + } + + StylesDocument sd = + StylesDocument.Factory.parse(parts[0].getInputStream()); + return sd.getStyles(); + } +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/HXFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/HXFWordExtractor.java deleted file mode 100644 index a4427e49ec..0000000000 --- a/src/ooxml/java/org/apache/poi/xwpf/extractor/HXFWordExtractor.java +++ /dev/null @@ -1,87 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf.extractor; - -import java.io.File; -import java.io.IOException; - -import org.apache.poi.POIXMLTextExtractor; -import org.apache.poi.hwpf.HWPFXML; -import org.apache.poi.hwpf.usermodel.HWPFXMLDocument; -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; - -/** - * Helper class to extract text from an OOXML Word file - */ -public class HXFWordExtractor extends POIXMLTextExtractor { - private HWPFXMLDocument document; - - public HXFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException { - this(new HWPFXMLDocument( - new HWPFXML(container) - )); - } - public HXFWordExtractor(HWPFXMLDocument document) { - super(document); - this.document = document; - } - - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" HXFWordExtractor "); - System.exit(1); - } - POIXMLTextExtractor extractor = - new HXFWordExtractor(HXFDocument.openPackage( - new File(args[0]) - )); - System.out.println(extractor.getText()); - } - - public String getText() { - CTBody body = document._getHWPFXML().getDocumentBody(); - StringBuffer text = new StringBuffer(); - - // Loop over paragraphs - CTP[] ps = body.getPArray(); - for (int i = 0; i < ps.length; i++) { - // Loop over ranges - CTR[] rs = ps[i].getRArray(); - for (int j = 0; j < rs.length; j++) { - // Loop over text runs - CTText[] texts = rs[j].getTArray(); - for (int k = 0; k < texts.length; k++) { - text.append( - texts[k].getStringValue() - ); - } - } - // New line after each paragraph. - text.append("\n"); - } - - return text.toString(); - } -} diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java new file mode 100644 index 0000000000..58fa4839c6 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java @@ -0,0 +1,84 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.extractor; + +import java.io.File; +import java.io.IOException; + +import org.apache.poi.POIXMLDocument; +import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.xwpf.XWPFDocument; +import org.apache.xmlbeans.XmlException; +import org.openxml4j.exceptions.OpenXML4JException; +import org.openxml4j.opc.Package; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; + +/** + * Helper class to extract text from an OOXML Word file + */ +public class XWPFWordExtractor extends POIXMLTextExtractor { + private XWPFDocument document; + + public XWPFWordExtractor(Package container) throws XmlException, OpenXML4JException, IOException { + this(new XWPFDocument(container)); + } + public XWPFWordExtractor(XWPFDocument document) { + super(document); + this.document = document; + } + + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Use:"); + System.err.println(" HXFWordExtractor "); + System.exit(1); + } + POIXMLTextExtractor extractor = + new XWPFWordExtractor(POIXMLDocument.openPackage( + args[0] + )); + System.out.println(extractor.getText()); + } + + public String getText() { + CTBody body = document.getDocumentBody(); + StringBuffer text = new StringBuffer(); + + // Loop over paragraphs + CTP[] ps = body.getPArray(); + for (int i = 0; i < ps.length; i++) { + // Loop over ranges + CTR[] rs = ps[i].getRArray(); + for (int j = 0; j < rs.length; j++) { + // Loop over text runs + CTText[] texts = rs[j].getTArray(); + for (int k = 0; k < texts.length; k++) { + text.append( + texts[k].getStringValue() + ); + } + } + // New line after each paragraph. + text.append("\n"); + } + + return text.toString(); + } +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/HWPFXMLDocument.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/HWPFXMLDocument.java deleted file mode 100644 index 64597e83dc..0000000000 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/HWPFXMLDocument.java +++ /dev/null @@ -1,36 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf.usermodel; - -import org.apache.poi.POIXMLDocument; -import org.apache.poi.hwpf.HWPFXML; - -/** - * High level representation of a ooxml text document. - */ -public class HWPFXMLDocument extends POIXMLDocument { - private HWPFXML hwpfXML; - - public HWPFXMLDocument(HWPFXML xml) { - super(xml); - this.hwpfXML = xml; - } - - public HWPFXML _getHWPFXML() { - return hwpfXML; - } -} diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XMLWordDocument.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XMLWordDocument.java new file mode 100644 index 0000000000..27c988c6d7 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XMLWordDocument.java @@ -0,0 +1,34 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.usermodel; + +import org.apache.poi.xwpf.XWPFDocument; + +/** + * High level representation of a ooxml text document. + */ +public class XMLWordDocument { + private XWPFDocument xwpfXML; + + public XMLWordDocument(XWPFDocument xml) { + this.xwpfXML = xml; + } + + public XWPFDocument _getXWPFXML() { + return xwpfXML; + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestHXFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestHXFPowerPointExtractor.java deleted file mode 100644 index 3b2ba0746a..0000000000 --- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestHXFPowerPointExtractor.java +++ /dev/null @@ -1,109 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hslf.extractor; - -import java.io.File; - -import org.apache.poi.hslf.HSLFXML; -import org.apache.poi.hslf.usermodel.HSLFXMLSlideShow; -import org.apache.poi.hxf.HXFDocument; - -import junit.framework.TestCase; - -/** - * Tests for HXFPowerPointExtractor - */ -public class TestHXFPowerPointExtractor extends TestCase { - /** - * A simple file - */ - private HSLFXML xmlA; - - protected void setUp() throws Exception { - super.setUp(); - - File fileA = new File( - System.getProperty("HSLF.testdata.path") + - File.separator + "sample.pptx" - ); - - xmlA = new HSLFXML(HXFDocument.openPackage(fileA)); - } - - /** - * Get text out of the simple file - */ - public void testGetSimpleText() throws Exception { - new HXFPowerPointExtractor(xmlA.getPackage()); - new HXFPowerPointExtractor(new XMLSlideShow(xmlA)); - - HXFPowerPointExtractor extractor = - new HXFPowerPointExtractor(xmlA.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Check Basics - assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n")); - assertTrue(text.endsWith("amet\n\n")); - - // Just slides, no notes - text = extractor.getText(true, false); - assertEquals( - "Lorem ipsum dolor sit amet\n" + - "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + - "\n" + - "Lorem ipsum dolor sit amet\n" + - "Lorem\n" + - "ipsum\n" + - "dolor\n" + - "sit\n" + - "amet\n" + - "\n", text - ); - - // Just notes, no slides - text = extractor.getText(false, true); - assertEquals( - "\n\n\n\n", text - ); - - // Both - text = extractor.getText(true, true); - assertEquals( - "Lorem ipsum dolor sit amet\n" + - "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + - "\n\n\n" + - "Lorem ipsum dolor sit amet\n" + - "Lorem\n" + - "ipsum\n" + - "dolor\n" + - "sit\n" + - "amet\n" + - "\n\n\n", text - ); - - // Via set defaults - extractor.setSlidesByDefault(false); - extractor.setNotesByDefault(true); - text = extractor.getText(); - assertEquals( - "\n\n\n\n", text - ); - } -} diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java new file mode 100644 index 0000000000..fb10a2421c --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java @@ -0,0 +1,111 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xslf.extractor; + +import java.io.File; + +import org.apache.poi.POIXMLDocument; +import org.apache.poi.xslf.XSLFSlideShow; + +import junit.framework.TestCase; + +/** + * Tests for HXFPowerPointExtractor + */ +public class TestXSLFPowerPointExtractor extends TestCase { + /** + * A simple file + */ + private XSLFSlideShow xmlA; + private File fileA; + + protected void setUp() throws Exception { + super.setUp(); + + fileA = new File( + System.getProperty("HSLF.testdata.path") + + File.separator + "sample.pptx" + ); + assertTrue(fileA.exists()); + + xmlA = new XSLFSlideShow(fileA.toString()); + } + + /** + * Get text out of the simple file + */ + public void testGetSimpleText() throws Exception { + new XSLFPowerPointExtractor(xmlA); + new XSLFPowerPointExtractor( + POIXMLDocument.openPackage(fileA.toString())); + + XSLFPowerPointExtractor extractor = + new XSLFPowerPointExtractor(xmlA); + extractor.getText(); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Check Basics + assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n")); + assertTrue(text.endsWith("amet\n\n")); + + // Just slides, no notes + text = extractor.getText(true, false); + assertEquals( + "Lorem ipsum dolor sit amet\n" + + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + + "\n" + + "Lorem ipsum dolor sit amet\n" + + "Lorem\n" + + "ipsum\n" + + "dolor\n" + + "sit\n" + + "amet\n" + + "\n", text + ); + + // Just notes, no slides + text = extractor.getText(false, true); + assertEquals( + "\n\n\n\n", text + ); + + // Both + text = extractor.getText(true, true); + assertEquals( + "Lorem ipsum dolor sit amet\n" + + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + + "\n\n\n" + + "Lorem ipsum dolor sit amet\n" + + "Lorem\n" + + "ipsum\n" + + "dolor\n" + + "sit\n" + + "amet\n" + + "\n\n\n", text + ); + + // Via set defaults + extractor.setSlidesByDefault(false); + extractor.setNotesByDefault(true); + text = extractor.getText(); + assertEquals( + "\n\n\n\n", text + ); + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/TestHWPFXML.java b/src/ooxml/testcases/org/apache/poi/xwpf/TestHWPFXML.java deleted file mode 100644 index 0d8e196f43..0000000000 --- a/src/ooxml/testcases/org/apache/poi/xwpf/TestHWPFXML.java +++ /dev/null @@ -1,110 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf; - -import java.io.File; - -import org.apache.poi.hxf.HXFDocument; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; - -import junit.framework.TestCase; - -public class TestHWPFXML extends TestCase { - private File sampleFile; - private File complexFile; - - protected void setUp() throws Exception { - super.setUp(); - - sampleFile = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "sample.docx" - ); - complexFile = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "IllustrativeCases.docx" - ); - } - - public void testContainsMainContentType() throws Exception { - Package pack = HXFDocument.openPackage(sampleFile); - - boolean found = false; - for(PackagePart part : pack.getParts()) { - if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) { - found = true; - } - System.out.println(part); - } - assertTrue(found); - } - - public void testOpen() throws Exception { - HXFDocument.openPackage(sampleFile); - HXFDocument.openPackage(complexFile); - - HWPFXML xml; - - // Simple file - xml = new HWPFXML( - HXFDocument.openPackage(sampleFile) - ); - // Check it has key parts - assertNotNull(xml.getDocument()); - assertNotNull(xml.getDocumentBody()); - assertNotNull(xml.getStyle()); - - // Complex file - xml = new HWPFXML( - HXFDocument.openPackage(complexFile) - ); - assertNotNull(xml.getDocument()); - assertNotNull(xml.getDocumentBody()); - assertNotNull(xml.getStyle()); - } - - public void testMetadataBasics() throws Exception { - HWPFXML xml = new HWPFXML( - HXFDocument.openPackage(sampleFile) - ); - assertNotNull(xml.getCoreProperties()); - assertNotNull(xml.getExtendedProperties()); - - assertEquals("Microsoft Office Word", xml.getExtendedProperties().getApplication()); - assertEquals(1315, xml.getExtendedProperties().getCharacters()); - assertEquals(10, xml.getExtendedProperties().getLines()); - - assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue()); - assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue()); - } - - public void testMetadataComplex() throws Exception { - HWPFXML xml = new HWPFXML( - HXFDocument.openPackage(complexFile) - ); - assertNotNull(xml.getCoreProperties()); - assertNotNull(xml.getExtendedProperties()); - - assertEquals("Microsoft Office Outlook", xml.getExtendedProperties().getApplication()); - assertEquals(5184, xml.getExtendedProperties().getCharacters()); - assertEquals(0, xml.getExtendedProperties().getLines()); - - assertEquals(" ", xml.getCoreProperties().getTitleProperty().getValue()); - assertEquals(" ", xml.getCoreProperties().getSubjectProperty().getValue()); - } -} diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/TestXWPFDocument.java b/src/ooxml/testcases/org/apache/poi/xwpf/TestXWPFDocument.java new file mode 100644 index 0000000000..94127193fe --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xwpf/TestXWPFDocument.java @@ -0,0 +1,120 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf; + +import java.io.File; + +import org.apache.poi.POIXMLDocument; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackagePart; + +import junit.framework.TestCase; + +public class TestXWPFDocument extends TestCase { + private File sampleFile; + private File complexFile; + + protected void setUp() throws Exception { + super.setUp(); + + sampleFile = new File( + System.getProperty("HWPF.testdata.path") + + File.separator + "sample.docx" + ); + complexFile = new File( + System.getProperty("HWPF.testdata.path") + + File.separator + "IllustrativeCases.docx" + ); + + assertTrue(sampleFile.exists()); + assertTrue(complexFile.exists()); + } + + public void testContainsMainContentType() throws Exception { + Package pack = POIXMLDocument.openPackage(sampleFile.toString()); + + boolean found = false; + for(PackagePart part : pack.getParts()) { + if(part.getContentType().equals(XWPFDocument.MAIN_CONTENT_TYPE)) { + found = true; + } + System.out.println(part); + } + assertTrue(found); + } + + public void testOpen() throws Exception { + POIXMLDocument.openPackage(sampleFile.toString()); + POIXMLDocument.openPackage(complexFile.toString()); + + new XWPFDocument( + POIXMLDocument.openPackage(sampleFile.toString()) + ); + new XWPFDocument( + POIXMLDocument.openPackage(complexFile.toString()) + ); + + XWPFDocument xml; + + // Simple file + xml = new XWPFDocument( + POIXMLDocument.openPackage(sampleFile.toString()) + ); + // Check it has key parts + assertNotNull(xml.getDocument()); + assertNotNull(xml.getDocumentBody()); + assertNotNull(xml.getStyle()); + + // Complex file + xml = new XWPFDocument( + POIXMLDocument.openPackage(complexFile.toString()) + ); + assertNotNull(xml.getDocument()); + assertNotNull(xml.getDocumentBody()); + assertNotNull(xml.getStyle()); + } + + public void testMetadataBasics() throws Exception { + XWPFDocument xml = new XWPFDocument( + POIXMLDocument.openPackage(sampleFile.toString()) + ); + assertNotNull(xml.getCoreProperties()); + assertNotNull(xml.getExtendedProperties()); + + assertEquals("Microsoft Office Word", xml.getExtendedProperties().getApplication()); + assertEquals(1315, xml.getExtendedProperties().getCharacters()); + assertEquals(10, xml.getExtendedProperties().getLines()); + + assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue()); + assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue()); + } + + public void testMetadataComplex() throws Exception { + XWPFDocument xml = new XWPFDocument( + POIXMLDocument.openPackage(complexFile.toString()) + ); + assertNotNull(xml.getCoreProperties()); + assertNotNull(xml.getExtendedProperties()); + + assertEquals("Microsoft Office Outlook", xml.getExtendedProperties().getApplication()); + assertEquals(5184, xml.getExtendedProperties().getCharacters()); + assertEquals(0, xml.getExtendedProperties().getLines()); + + assertEquals(" ", xml.getCoreProperties().getTitleProperty().getValue()); + assertEquals(" ", xml.getCoreProperties().getSubjectProperty().getValue()); + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestHXFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestHXFWordExtractor.java deleted file mode 100644 index 62695b3a8c..0000000000 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestHXFWordExtractor.java +++ /dev/null @@ -1,117 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hwpf.extractor; - -import java.io.File; - -import org.apache.poi.hwpf.HWPFXML; -import org.apache.poi.hwpf.usermodel.HWPFXMLDocument; -import org.apache.poi.hxf.HXFDocument; - -import junit.framework.TestCase; - -/** - * Tests for HXFWordExtractor - */ -public class TestHXFWordExtractor extends TestCase { - /** - * A very simple file - */ - private HWPFXML xmlA; - /** - * A fairly complex file - */ - private HWPFXML xmlB; - - protected void setUp() throws Exception { - super.setUp(); - - File fileA = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "sample.docx" - ); - File fileB = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "IllustrativeCases.docx" - ); - - xmlA = new HWPFXML(HXFDocument.openPackage(fileA)); - xmlB = new HWPFXML(HXFDocument.openPackage(fileB)); - } - - /** - * Get text out of the simple file - */ - public void testGetSimpleText() throws Exception { - new HXFWordExtractor(xmlA.getPackage()); - new HXFWordExtractor(new HWPFXMLDocument(xmlA)); - - HXFWordExtractor extractor = - new HXFWordExtractor(xmlA.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Check contents - assertTrue(text.startsWith( - "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio." - )); - assertTrue(text.endsWith( - "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n" - )); - - // Check number of paragraphs - int ps = 0; - char[] t = text.toCharArray(); - for (int i = 0; i < t.length; i++) { - if(t[i] == '\n') { ps++; } - } - assertEquals(3, ps); - } - - /** - * Tests getting the text out of a complex file - */ - public void testGetComplexText() throws Exception { - HXFWordExtractor extractor = - new HXFWordExtractor(xmlB.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - char euro = '\u20ac'; - System.err.println("'"+text.substring(text.length() - 20) + "'"); - - // Check contents - assertTrue(text.startsWith( - " \n(V) ILLUSTRATIVE CASES\n\n" - )); - assertTrue(text.endsWith( - "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n" - )); - - // Check number of paragraphs - int ps = 0; - char[] t = text.toCharArray(); - for (int i = 0; i < t.length; i++) { - if(t[i] == '\n') { ps++; } - } - assertEquals(79, ps); - } -} diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java new file mode 100644 index 0000000000..9a1f239bcc --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -0,0 +1,120 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.extractor; + +import java.io.File; + +import org.apache.poi.POIXMLDocument; +import org.apache.poi.xwpf.XWPFDocument; + +import junit.framework.TestCase; + +/** + * Tests for HXFWordExtractor + */ +public class TestXWPFWordExtractor extends TestCase { + /** + * A very simple file + */ + private XWPFDocument xmlA; + private File fileA; + /** + * A fairly complex file + */ + private XWPFDocument xmlB; + private File fileB; + + protected void setUp() throws Exception { + super.setUp(); + + fileA = new File( + System.getProperty("HWPF.testdata.path") + + File.separator + "sample.docx" + ); + fileB = new File( + System.getProperty("HWPF.testdata.path") + + File.separator + "IllustrativeCases.docx" + ); + assertTrue(fileA.exists()); + assertTrue(fileB.exists()); + + xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString())); + xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString())); + } + + /** + * Get text out of the simple file + */ + public void testGetSimpleText() throws Exception { + new XWPFWordExtractor(xmlA); + new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString())); + + XWPFWordExtractor extractor = + new XWPFWordExtractor(xmlA); + extractor.getText(); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Check contents + assertTrue(text.startsWith( + "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio." + )); + assertTrue(text.endsWith( + "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n" + )); + + // Check number of paragraphs + int ps = 0; + char[] t = text.toCharArray(); + for (int i = 0; i < t.length; i++) { + if(t[i] == '\n') { ps++; } + } + assertEquals(3, ps); + } + + /** + * Tests getting the text out of a complex file + */ + public void testGetComplexText() throws Exception { + XWPFWordExtractor extractor = + new XWPFWordExtractor(xmlB); + extractor.getText(); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + char euro = '\u20ac'; + System.err.println("'"+text.substring(text.length() - 20) + "'"); + + // Check contents + assertTrue(text.startsWith( + " \n(V) ILLUSTRATIVE CASES\n\n" + )); + assertTrue(text.endsWith( + "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n" + )); + + // Check number of paragraphs + int ps = 0; + char[] t = text.toCharArray(); + for (int i = 0; i < t.length; i++) { + if(t[i] == '\n') { ps++; } + } + assertEquals(79, ps); + } +}