==================================================================== */
package org.apache.poi.xslf.extractor;
-import java.io.IOException;
-
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xslf.usermodel.DrawingParagraph;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.xmlbeans.XmlException;
-import org.apache.xmlbeans.XmlObject;
-import org.apache.xmlbeans.XmlCursor;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
-import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
-import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
+import org.openxmlformats.schemas.presentationml.x2006.main.*;
+
+import java.io.IOException;
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
private XMLSlideShow slideshow;
slideshow._getXSLFSlideShow().getSlideComments(slideId);
if(slideText) {
- extractText(rawSlide.getCSld().getSpTree(), text);
+ extractText(slides[i].getCommonSlideData(), text);
// Comments too for the slide
if(comments != null) {
}
}
}
+
if(notesText && notes != null) {
- extractText(notes.getCSld().getSpTree(), text);
+ extractText(new XSLFCommonSlideData(notes.getCSld()), text);
}
} catch(Exception e) {
throw new RuntimeException(e);
return text.toString();
}
- private void extractText(CTGroupShape gs, StringBuffer text) {
- CTShape[] shapes = gs.getSpArray();
- for (int i = 0; i < shapes.length; i++) {
- CTTextBody textBody =
- shapes[i].getTxBody();
- if(textBody != null) {
- CTTextParagraph[] paras =
- textBody.getPArray();
- for (int j = 0; j < paras.length; j++) {
- XmlCursor c = paras[j].newCursor();
- c.selectPath("./*");
- while (c.toNextSelection()) {
- XmlObject o = c.getObject();
- if(o instanceof CTRegularTextRun){
- CTRegularTextRun txrun = (CTRegularTextRun)o;
- text.append( txrun.getT() );
- } else if (o instanceof CTTextLineBreak){
- text.append('\n');
- }
- }
-
- // End each paragraph with a new line
- text.append("\n");
- }
- }
- }
- }
+ private void extractText(XSLFCommonSlideData data, StringBuffer text) {
+ for (DrawingParagraph p : data.getText()) {
+ text.append(p.getText());
+ text.append("\n");
+ }
+ }
}
--- /dev/null
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak;
+import org.apache.xmlbeans.XmlCursor;
+import org.apache.xmlbeans.XmlObject;
+
+public class DrawingParagraph {
+ private final CTTextParagraph p;
+
+ public DrawingParagraph(CTTextParagraph p) {
+ this.p = p;
+ }
+
+ public CharSequence getText() {
+ StringBuilder text = new StringBuilder();
+
+ XmlCursor c = p.newCursor();
+ c.selectPath("./*");
+ while (c.toNextSelection()) {
+ XmlObject o = c.getObject();
+ if (o instanceof CTRegularTextRun) {
+ CTRegularTextRun txrun = (CTRegularTextRun) o;
+ text.append(txrun.getT());
+ } else if (o instanceof CTTextLineBreak) {
+ text.append('\n');
+ }
+ }
+
+ return text;
+ }
+}
--- /dev/null
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow;
+
+public class DrawingTable {
+ private final CTTable table;
+
+ public DrawingTable(CTTable table) {
+ this.table = table;
+ }
+
+ public DrawingTableRow[] getRows() {
+ CTTableRow[] ctTableRows = table.getTrArray();
+ DrawingTableRow[] o = new DrawingTableRow[ctTableRows.length];
+
+ for (int i=0; i<o.length; i++) {
+ o[i] = new DrawingTableRow(ctTableRows[i]);
+ }
+
+ return o;
+ }
+}
--- /dev/null
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell;
+
+public class DrawingTableCell {
+ private final CTTableCell cell;
+ private final DrawingTextBody drawingTextBody;
+
+ public DrawingTableCell(CTTableCell cell) {
+ this.cell = cell;
+ drawingTextBody = new DrawingTextBody(this.cell.getTxBody());
+ }
+
+ public DrawingTextBody getTextBody() {
+ return drawingTextBody;
+ }
+}
--- /dev/null
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTableCell;
+
+public class DrawingTableRow {
+ private final CTTableRow row;
+
+ public DrawingTableRow(CTTableRow row) {
+ this.row = row;
+ }
+
+ public DrawingTableCell[] getCells() {
+ CTTableCell[] ctTableCells = row.getTcArray();
+ DrawingTableCell[] o = new DrawingTableCell[ctTableCells.length];
+
+ for (int i=0; i<o.length; i++) {
+ o[i] = new DrawingTableCell(ctTableCells[i]);
+ }
+
+ return o;
+ }
+}
\ No newline at end of file
--- /dev/null
+package org.apache.poi.xslf.usermodel;
+
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
+
+public class DrawingTextBody {
+ private final CTTextBody textBody;
+
+ public DrawingTextBody(CTTextBody textBody) {
+ this.textBody = textBody;
+ }
+
+ public DrawingParagraph[] getParagraphs() {
+ CTTextParagraph[] pArray = textBody.getPArray();
+ DrawingParagraph[] o = new DrawingParagraph[pArray.length];
+
+ for (int i=0; i<o.length; i++) {
+ o[i] = new DrawingParagraph(pArray[i]);
+ }
+
+ return o;
+ }
+}
--- /dev/null
+package org.apache.poi.xslf.usermodel;
+
+import org.apache.xmlbeans.XmlCursor;
+import org.apache.xmlbeans.XmlObject;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTGraphicalObjectData;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTable;
+import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTCommonSlideData;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTGraphicalObjectFrame;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
+import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class XSLFCommonSlideData {
+ private final CTCommonSlideData data;
+
+ public XSLFCommonSlideData(CTCommonSlideData data) {
+ this.data = data;
+ }
+
+ public List<DrawingParagraph> getText() {
+ CTGroupShape gs = data.getSpTree();
+
+ List<DrawingParagraph> out = new ArrayList<DrawingParagraph>();
+
+ CTShape[] shapes = gs.getSpArray();
+ for (int i = 0; i < shapes.length; i++) {
+ CTTextBody ctTextBody = shapes[i].getTxBody();
+ if (ctTextBody==null) {
+ continue;
+ }
+
+ DrawingTextBody textBody = new DrawingTextBody(ctTextBody);
+
+ out.addAll(Arrays.asList(textBody.getParagraphs()));
+ }
+
+ CTGraphicalObjectFrame[] graphicFrames = gs.getGraphicFrameArray();
+ for (CTGraphicalObjectFrame frame: graphicFrames) {
+ CTGraphicalObjectData data = frame.getGraphic().getGraphicData();
+ XmlCursor c = data.newCursor();
+ c.selectPath("./*");
+
+ while (c.toNextSelection()) {
+ XmlObject o = c.getObject();
+
+ if (o instanceof CTTable) {
+ DrawingTable table = new DrawingTable((CTTable) o);
+
+ for (DrawingTableRow row : table.getRows()) {
+ for (DrawingTableCell cell : row.getCells()) {
+ DrawingTextBody textBody = cell.getTextBody();
+
+ out.addAll(Arrays.asList(textBody.getParagraphs()));
+ }
+ }
+ }
+ }
+ }
+
+ return out;
+ }
+
+}
public class XSLFSlide extends XSLFSheet implements Slide {
private CTSlide slide;
private CTSlideIdListEntry slideId;
+ private XSLFCommonSlideData data;
public XSLFSlide(CTSlide slide, CTSlideIdListEntry slideId, SlideShow parent) {
super(parent);
this.slide = slide;
this.slideId = slideId;
+ this.data = new XSLFCommonSlideData(slide.getCSld());
}
/**
// TODO Auto-generated method stub
}
+
+ public XSLFCommonSlideData getCommonSlideData() {
+ return data;
+ }
}
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
}
+
+ public void testTable() throws Exception {
+ POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
+ xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
+ XSLFPowerPointExtractor extractor =
+ new XSLFPowerPointExtractor(xmlA);
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check comments are there
+ assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
+ }
}