From: Nick Burch Date: Tue, 5 Aug 2008 18:05:29 +0000 (+0000) Subject: Fix bug #45545 - Improve XSLF usermodel support, and include XSLF comments in extract... X-Git-Tag: REL_3_5_BETA2~26 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=1559e6cc3ca90830e00c16a60ae2e7b284129c20;p=poi.git Fix bug #45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@682843 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index ac5d2f636a..0317cd9d27 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text 45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor 45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index a9e1cbdc6a..ce1869e2c0 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text 45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor 45431 - Support for .xlsm files, sufficient for simple files to be loaded by excel without warning New class org.apache.poi.hssf.record.RecordFormatException, which DDF uses instead of the HSSF version, and the HSSF version inherits from diff --git a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java index 0f28c2fdc0..4366f82e76 100644 --- a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java +++ b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java @@ -16,18 +16,20 @@ ==================================================================== */ package org.apache.poi.xslf.extractor; -import java.io.File; import java.io.IOException; -import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.xslf.XSLFSlideShow; +import org.apache.poi.xslf.usermodel.XMLSlideShow; +import org.apache.poi.xslf.usermodel.XSLFSlide; import org.apache.xmlbeans.XmlException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; +import org.openxmlformats.schemas.presentationml.x2006.main.CTComment; +import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList; import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; @@ -35,17 +37,20 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; public class XSLFPowerPointExtractor extends POIXMLTextExtractor { - private XSLFSlideShow slideshow; + private XMLSlideShow slideshow; private boolean slidesByDefault = true; private boolean notesByDefault = false; + public XSLFPowerPointExtractor(XMLSlideShow slideshow) { + super(slideshow._getXSLFSlideShow()); + this.slideshow = slideshow; + } + public XSLFPowerPointExtractor(XSLFSlideShow slideshow) throws XmlException, IOException { + this(new XMLSlideShow(slideshow)); + } public XSLFPowerPointExtractor(Package container) throws XmlException, OpenXML4JException, IOException { this(new XSLFSlideShow(container)); } - public XSLFPowerPointExtractor(XSLFSlideShow slideshow) { - super(slideshow); - this.slideshow = slideshow; - } public static void main(String[] args) throws Exception { if(args.length < 1) { @@ -88,18 +93,32 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { */ public String getText(boolean slideText, boolean notesText) { StringBuffer text = new StringBuffer(); - - CTSlideIdListEntry[] slideRefs = - slideshow.getSlideReferences().getSldIdArray(); - for (int i = 0; i < slideRefs.length; i++) { + + XSLFSlide[] slides = slideshow.getSlides(); + for(int i = 0; i < slides.length; i++) { + CTSlide rawSlide = slides[i]._getCTSlide(); + CTSlideIdListEntry slideId = slides[i]._getCTSlideId(); + try { - CTSlide slide = - slideshow.getSlide(slideRefs[i]); + // For now, still very low level CTNotesSlide notes = - slideshow.getNotes(slideRefs[i]); + slideshow._getXSLFSlideShow().getNotes(slideId); + CTCommentList comments = + slideshow._getXSLFSlideShow().getSlideComments(slideId); if(slideText) { - extractText(slide.getCSld().getSpTree(), text); + extractText(rawSlide.getCSld().getSpTree(), text); + + // Comments too for the slide + if(comments != null) { + for(CTComment comment : comments.getCmArray()) { + // TODO - comment authors too + // (They're in another stream) + text.append( + comment.getText() + "\n" + ); + } + } } if(notesText && notes != null) { extractText(notes.getCSld().getSpTree(), text); diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java index fb10a2421c..ac31356374 100644 --- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java @@ -108,4 +108,22 @@ public class TestXSLFPowerPointExtractor extends TestCase { "\n\n\n\n", text ); } + + public void testGetComments() throws Exception { + File file = new File( + System.getProperty("HSLF.testdata.path") + + File.separator + "45545_Comment.pptx" + ); + assertTrue(file.exists()); + + xmlA = new XSLFSlideShow(file.toString()); + XSLFPowerPointExtractor extractor = + new XSLFPowerPointExtractor(xmlA); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Check comments are there + assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); + } }