import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xslf.XSLFSlideShow;
import org.apache.poi.xslf.usermodel.DrawingParagraph;
+import org.apache.poi.xslf.usermodel.DrawingTextBody;
+import org.apache.poi.xslf.usermodel.DrawingTextPlaceholder;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFCommentAuthors;
import org.apache.poi.xslf.usermodel.XSLFComments;
import org.apache.poi.xslf.usermodel.XSLFNotes;
import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xslf.usermodel.XSLFSlideLayout;
import org.apache.poi.xslf.usermodel.XSLFSlideMaster;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
try {
XSLFNotes notes = slide.getNotes();
XSLFComments comments = slide.getComments();
+ XSLFSlideLayout layout = slide.getSlideLayout();
XSLFSlideMaster master = slide.getMasterSheet();
// TODO Do the slide's name
// Do the slide's text if requested
if (slideText) {
- extractText(slide.getCommonSlideData(), text);
+ extractText(slide.getCommonSlideData(), false, text);
- // If there's a master sheet and it's requested, grab text from there
- if(masterText && master != null) {
- extractText(master.getCommonSlideData(), text);
+ // If requested, get text from the master and it's layout
+ if(masterText) {
+ if(layout != null) {
+ extractText(layout.getCommonSlideData(), true, text);
+ }
+ if(master != null) {
+ extractText(master.getCommonSlideData(), true, text);
+ }
}
// If the slide has comments, do those too
// Do the notes if requested
if (notesText && notes != null) {
- extractText(notes.getCommonSlideData(), text);
+ extractText(notes.getCommonSlideData(), false, text);
}
} catch (Exception e) {
throw new RuntimeException(e);
return text.toString();
}
- private void extractText(XSLFCommonSlideData data, StringBuffer text) {
- for (DrawingParagraph p : data.getText()) {
+ private void extractText(XSLFCommonSlideData data, boolean skipPlaceholders, StringBuffer text) {
+ for(DrawingTextBody textBody : data.getDrawingText()) {
+ if(skipPlaceholders && textBody instanceof DrawingTextPlaceholder) {
+ DrawingTextPlaceholder ph = (DrawingTextPlaceholder)textBody;
+ if(! ph.isPlaceholderCustom()) {
+ // Skip non-customised placeholder text
+ continue;
+ }
+ }
+
+ for (DrawingParagraph p : textBody.getParagraphs()) {
text.append(p.getText());
text.append("\n");
- }
- }
+ }
+ }
+ }
}
package org.apache.poi.xslf.usermodel;
+import java.util.List;
+
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
-import java.util.List;
-
public class DrawingTextBody {
private final CTTextBody textBody;
public DrawingTextBody(CTTextBody textBody) {
- this.textBody = textBody;
+ this.textBody = textBody;
}
public DrawingParagraph[] getParagraphs() {
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTPlaceholder;
+import org.openxmlformats.schemas.presentationml.x2006.main.STPlaceholderType;
+
+/**
+ * A {@link DrawingTextBody} which is a placeholder
+ * @author nick
+ *
+ */
+public class DrawingTextPlaceholder extends DrawingTextBody {
+ private final CTPlaceholder placeholder;
+
+ public DrawingTextPlaceholder(CTTextBody textBody, CTPlaceholder placeholder) {
+ super(textBody);
+ this.placeholder = placeholder;
+ }
+
+ /**
+ * What kind of placeholder is this?
+ */
+ public String getPlaceholderType() {
+ return placeholder.getType().toString();
+ }
+
+ /**
+ * What kind of placeholder is this?
+ */
+ public STPlaceholderType.Enum getPlaceholderTypeEnum() {
+ return placeholder.getType();
+ }
+
+ /**
+ * Is the PlaceHolder text customised?
+ */
+ public boolean isPlaceholderCustom() {
+ return placeholder.getHasCustomPrompt();
+ }
+}
import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTApplicationNonVisualDrawingProps;
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommonSlideData;
import org.openxmlformats.schemas.presentationml.x2006.main.CTGraphicalObjectFrame;
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
public XSLFCommonSlideData(CTCommonSlideData data) {
this.data = data;
}
-
- public List<DrawingParagraph> getText() {
+
+ public List<DrawingTextBody> getDrawingText() {
CTGroupShape gs = data.getSpTree();
- List<DrawingParagraph> out = new ArrayList<DrawingParagraph>();
+ List<DrawingTextBody> out = new ArrayList<DrawingTextBody>();
processShape(gs, out);
for (DrawingTableRow row : table.getRows()) {
for (DrawingTableCell cell : row.getCells()) {
DrawingTextBody textBody = cell.getTextBody();
-
- out.addAll(Arrays.asList(textBody.getParagraphs()));
+ out.add(textBody);
}
}
}
return out;
}
+ public List<DrawingParagraph> getText() {
+ List<DrawingParagraph> paragraphs = new ArrayList<DrawingParagraph>();
+ for(DrawingTextBody textBody : getDrawingText()) {
+ paragraphs.addAll(Arrays.asList(textBody.getParagraphs()));
+ }
+ return paragraphs;
+ }
- private void processShape(CTGroupShape gs, List<DrawingParagraph> out) {
+ private void processShape(CTGroupShape gs, List<DrawingTextBody> out) {
List<CTShape> shapes = gs.getSpList();
- for (int i = 0; i < shapes.size(); i++) {
- CTTextBody ctTextBody = shapes.get(i).getTxBody();
+ for (CTShape shape : shapes) {
+ CTTextBody ctTextBody = shape.getTxBody();
if (ctTextBody==null) {
continue;
}
+
+ DrawingTextBody textBody;
+ CTApplicationNonVisualDrawingProps nvpr = shape.getNvSpPr().getNvPr();
+ if(nvpr.isSetPh()) {
+ textBody = new DrawingTextPlaceholder(ctTextBody, nvpr.getPh());
+ } else {
+ textBody = new DrawingTextBody(ctTextBody);
+ }
- DrawingTextBody textBody = new DrawingTextBody(ctTextBody);
-
- out.addAll(Arrays.asList(textBody.getParagraphs()));
+ out.add(textBody);
}
}
-
}
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
assertTrue(text.contains("amet\n\n"));
- // Our master text, for tests
+ // Our placeholder master text
+ // This shouldn't show up in the output
String masterText =
"Click to edit Master title style\n" +
+ "Click to edit Master subtitle style\n" +
+ "\n\n\n\n\n\n" +
+ "Click to edit Master title style\n" +
"Click to edit Master text styles\n" +
"Second level\n" +
"Third level\n" +
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
- masterText +
- "\n\n\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
- "\n" +
- masterText +
- "\n\n\n"
+ "\n"
, text
);
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
- masterText +
- "\n\n\n\n\n" +
+ "\n\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
- "\n" +
- masterText +
- "\n\n\n\n\n"
+ "\n\n\n"
, text
);
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xml);
+ extractor.setSlidesByDefault(true);
+ extractor.setNotesByDefault(false);
+ extractor.setMasterByDefault(true);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check master text is there
assertTrue("Unable to find expected word in text\n" + text,
text.contains("Footer from the master slide"));
+
+ // Theme text shouldn't show up
+ String themeText =
+ "Theme Master Title\n" +
+ "Theme Master first level\n" +
+ "And the 2nd level\n" +
+ "Our 3rd level goes here\n" +
+ "And onto the 4th, such fun….\n" +
+ "Finally is the Fifth level\n";
// Check the whole text
assertEquals(
"First page title\n" +
"First page subtitle\n" +
-// "This text comes from the Master Slide\n" + // TODO
-// "This is the Master Title\n" + // TODO
- "\n" + // TODO Should be the above
+ "This is the Master Title\n" +
+ "This text comes from the Master Slide\n" +
+ "\n" +
+ // TODO Detect we didn't have a title, and include the master one
"2nd page subtitle\n" +
-// "This text comes from the Master Slide\n" + // TODO
- "Footer from the master slide\n"
+ "Footer from the master slide\n" +
+ "This is the Master Title\n" +
+ "This text comes from the Master Slide\n"
, text
);
}